From 8a1552cedd31b981555218cbe3cecc2463051cd8 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 8 Nov 2024 20:19:09 +0000 Subject: [PATCH 01/19] Rename _vmfb to _iree --- .github/workflows/ci_eval.yaml | 12 +++---- ...{perplexity_vmfb.py => perplexity_iree.py} | 0 .../evaluate/baseline_perplexity_scores.json | 2 +- ...y_vmfb_test.py => perplexity_iree_test.py} | 34 +++++++++---------- 4 files changed, 24 insertions(+), 24 deletions(-) rename sharktank/sharktank/evaluate/{perplexity_vmfb.py => perplexity_iree.py} (100%) rename sharktank/tests/evaluate/{perplexity_vmfb_test.py => perplexity_iree_test.py} (92%) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 4c98bf79b..484f1bed1 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -21,9 +21,9 @@ concurrency: cancel-in-progress: true jobs: - test_perplexity_vmfb: + test_perplexity_iree: timeout-minutes: 1000 - name: "IREE/vmfb" + name: "Perplexity-IREE" strategy: matrix: version: [3.11] @@ -74,12 +74,12 @@ jobs: iree-base-runtime \ "numpy<2.0" - - name: Run perplexity test with vmfb - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json + - name: Run perplexity test with IREE + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json test_perplexity_torch: timeout-minutes: 1000 - name: "Torch/eager mode" + name: "Perplexity-Torch" strategy: matrix: version: [3.11] @@ -122,5 +122,5 @@ jobs: pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - - name: Run perplexity test in eager mode + - name: Run perplexity test with Torch run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json diff --git a/sharktank/sharktank/evaluate/perplexity_vmfb.py b/sharktank/sharktank/evaluate/perplexity_iree.py similarity index 100% rename from sharktank/sharktank/evaluate/perplexity_vmfb.py rename to sharktank/sharktank/evaluate/perplexity_iree.py diff --git a/sharktank/tests/evaluate/baseline_perplexity_scores.json b/sharktank/tests/evaluate/baseline_perplexity_scores.json index ac2cd7b83..24511b05f 100644 --- a/sharktank/tests/evaluate/baseline_perplexity_scores.json +++ b/sharktank/tests/evaluate/baseline_perplexity_scores.json @@ -210,7 +210,7 @@ ], "mean_perplexity": 6.060831 }, - "llama3_8B_f16_decomposed_vmfb": { + "llama3_8B_f16_decomposed_iree": { "perplexities": [ 6.651368, 22.059452, diff --git a/sharktank/tests/evaluate/perplexity_vmfb_test.py b/sharktank/tests/evaluate/perplexity_iree_test.py similarity index 92% rename from sharktank/tests/evaluate/perplexity_vmfb_test.py rename to sharktank/tests/evaluate/perplexity_iree_test.py index 93ffbe61c..8cf2055c9 100644 --- a/sharktank/tests/evaluate/perplexity_vmfb_test.py +++ b/sharktank/tests/evaluate/perplexity_iree_test.py @@ -8,7 +8,7 @@ import pytest import json -from sharktank.evaluate import perplexity_vmfb +from sharktank.evaluate import perplexity_iree longrun = pytest.mark.skipif("not config.getoption('longrun')") @@ -32,10 +32,10 @@ def test_llama3_8B_f16_decomposed(self): # Llama 3.1 8B decomposed - model_name = "llama3_8B_f16_decomposed_vmfb" + model_name = "llama3_8B_f16_decomposed_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_8b_f16_model}", f"--tokenizer-config-json={self.llama3_8b_tokenizer}", @@ -67,10 +67,10 @@ def test_llama3_8B_f16(self): # Llama 3.1 8B non-decomposed - model_name = "llama3_8B_f16_vmfb" + model_name = "llama3_8B_f16_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_8b_f16_model}", f"--tokenizer-config-json={self.llama3_8b_tokenizer}", @@ -102,10 +102,10 @@ def test_llama3_8B_fp8_decomposed(self): # Llama 3.1 8B decomposed - model_name = "llama3_8B_fp8_decomposed_vmfb" + model_name = "llama3_8B_fp8_decomposed_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_8b_fp8_model}", f"--tokenizer-config-json={self.llama3_8b_tokenizer}", @@ -137,10 +137,10 @@ def test_llama3_8B_fp8(self): # Llama 3.1 8B non-decomposed - model_name = "llama3_8B_fp8_vmfb" + model_name = "llama3_8B_fp8_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_8b_fp8_model}", f"--tokenizer-config-json={self.llama3_8b_tokenizer}", @@ -172,10 +172,10 @@ def test_llama3_405B_f16_decomposed(self): # Llama 3.1 405B decomposed - model_name = "llama3_405B_f16_decomposed_vmfb" + model_name = "llama3_405B_f16_decomposed_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_405b_f16_model}", f"--tokenizer-config-json={self.llama3_405b_tokenizer}", @@ -207,10 +207,10 @@ def test_llama3_405B_f16(self): # Llama 3.1 405B non-decomposed - model_name = "llama3_405B_f16_vmfb" + model_name = "llama3_405B_f16_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_405b_f16_model}", f"--tokenizer-config-json={self.llama3_405b_tokenizer}", @@ -242,10 +242,10 @@ def test_llama3_405B_fp8_decomposed(self): # Llama 3.1 405B decomposed - model_name = "llama3_405B_fp8_decomposed_vmfb" + model_name = "llama3_405B_fp8_decomposed_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_405b_fp8_model}", f"--tokenizer-config-json={self.llama3_405b_tokenizer}", @@ -277,10 +277,10 @@ def test_llama3_405B_fp8(self): # Llama 3.1 405B non-decomposed - model_name = "llama3_405B_fp8_vmfb" + model_name = "llama3_405B_fp8_iree" baseline_perplexity = self.baseline_perplexity[model_name] - current_perplexity = perplexity_vmfb.main( + current_perplexity = perplexity_iree.main( [ f"--irpa-file={self.llama3_405b_fp8_model}", f"--tokenizer-config-json={self.llama3_405b_tokenizer}", From c69d09b4f5006efdc56b1b45b4454e845ac419db Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 8 Nov 2024 20:21:50 +0000 Subject: [PATCH 02/19] Add perplexity scoreboard and description --- sharktank/sharktank/evaluate/README.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sharktank/sharktank/evaluate/README.md b/sharktank/sharktank/evaluate/README.md index 784bb24fd..640d5e630 100644 --- a/sharktank/sharktank/evaluate/README.md +++ b/sharktank/sharktank/evaluate/README.md @@ -9,16 +9,31 @@ pip install -r sharktank/requirements-tests.txt ### Perplexity +Perplexity score measures the ability of a language model to predict the next token in a sequence. A lower score indicates that a model has higher certainty in it's predictions. Perplexity acts as an intrinsic evaluation metric that measures the model quality, independent of any downstream task. + +In SHARK-Platform, we use perplexity to track code regressions and quality loss across quantized models (with FP16 as baseline). We use 100 prompts from the Wikitext-2 test set and calculate the mean perplexities shown below. These numbers are neither comparable between models with different tokenizers nor with other projects due to varying implementations. + Test perplexity for Llama3.1 8B & 405B (FP16 & FP8) models: ```bash pytest sharktank/tests/evaluate/perplexity_test.py --longrun ``` -Get perplexity for a new model: +Calculate the perplexity for a new model: ```bash python -m sharktank.evaluate.perplexity \ --gguf-file=llama3_70b_f16.gguf \ --tokenizer-config-json=tokenizer_config.json ``` + +### LLaMA 3.1 Scoreboard + +| CPU | GPU | +|:---------------|:-----------| +| AMD EPYC 9554 | MI300X | + + +|Models |Model size (GB) |Torch |IREE | +|:--------|:---------------|:----------|:----------| +|8B f16 |16.07 |14.930181 |14.991893 | From 2e286c8b33bd8b916b3753b29157c44b20e08a06 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 8 Nov 2024 20:38:55 +0000 Subject: [PATCH 03/19] README updates --- sharktank/sharktank/evaluate/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sharktank/sharktank/evaluate/README.md b/sharktank/sharktank/evaluate/README.md index 640d5e630..b294e233c 100644 --- a/sharktank/sharktank/evaluate/README.md +++ b/sharktank/sharktank/evaluate/README.md @@ -13,13 +13,13 @@ Perplexity score measures the ability of a language model to predict the next to In SHARK-Platform, we use perplexity to track code regressions and quality loss across quantized models (with FP16 as baseline). We use 100 prompts from the Wikitext-2 test set and calculate the mean perplexities shown below. These numbers are neither comparable between models with different tokenizers nor with other projects due to varying implementations. -Test perplexity for Llama3.1 8B & 405B (FP16 & FP8) models: +* Test perplexity for Llama3.1 8B (FP16) model: ```bash pytest sharktank/tests/evaluate/perplexity_test.py --longrun ``` -Calculate the perplexity for a new model: +* Calculate the perplexity for a new model: ```bash python -m sharktank.evaluate.perplexity \ @@ -30,7 +30,7 @@ python -m sharktank.evaluate.perplexity \ ### LLaMA 3.1 Scoreboard | CPU | GPU | -|:---------------|:-----------| +|:-------------: |:----------:| | AMD EPYC 9554 | MI300X | From f1b167f26c63a32994af1ca6e3fece85fe753284 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 8 Nov 2024 20:39:44 +0000 Subject: [PATCH 04/19] Add perplexity to github.io dashboard --- .github/workflows/ci_eval.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 484f1bed1..66f782407 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -77,6 +77,12 @@ jobs: - name: Run perplexity test with IREE run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + publish_dir: ./out + test_perplexity_torch: timeout-minutes: 1000 name: "Perplexity-Torch" @@ -124,3 +130,9 @@ jobs: - name: Run perplexity test with Torch run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + publish_dir: ./out From d79236a249e7a6324f8b6ca5772c7a25fae1c120 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 8 Nov 2024 20:42:44 +0000 Subject: [PATCH 05/19] README updates --- sharktank/sharktank/evaluate/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sharktank/sharktank/evaluate/README.md b/sharktank/sharktank/evaluate/README.md index b294e233c..448aa60ca 100644 --- a/sharktank/sharktank/evaluate/README.md +++ b/sharktank/sharktank/evaluate/README.md @@ -11,7 +11,7 @@ pip install -r sharktank/requirements-tests.txt Perplexity score measures the ability of a language model to predict the next token in a sequence. A lower score indicates that a model has higher certainty in it's predictions. Perplexity acts as an intrinsic evaluation metric that measures the model quality, independent of any downstream task. -In SHARK-Platform, we use perplexity to track code regressions and quality loss across quantized models (with FP16 as baseline). We use 100 prompts from the Wikitext-2 test set and calculate the mean perplexities shown below. These numbers are neither comparable between models with different tokenizers nor with other projects due to varying implementations. +In SHARK-Platform, we use perplexity to track code regressions and quality loss across quantized models (with FP16 as baseline). We use 100 prompts randomly selected from the Wikitext-2 test set and calculate the mean perplexities shown below. These numbers are neither comparable between models with different tokenizers nor with other projects due to varying implementations. * Test perplexity for Llama3.1 8B (FP16) model: @@ -19,7 +19,7 @@ In SHARK-Platform, we use perplexity to track code regressions and quality loss pytest sharktank/tests/evaluate/perplexity_test.py --longrun ``` -* Calculate the perplexity for a new model: +* Calculate perplexity for a new model: ```bash python -m sharktank.evaluate.perplexity \ From 16c100be8347acc300efcdbdb05512babf14c5ca Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 8 Nov 2024 20:44:41 +0000 Subject: [PATCH 06/19] README updates --- sharktank/sharktank/evaluate/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sharktank/sharktank/evaluate/README.md b/sharktank/sharktank/evaluate/README.md index 448aa60ca..d5895c221 100644 --- a/sharktank/sharktank/evaluate/README.md +++ b/sharktank/sharktank/evaluate/README.md @@ -27,7 +27,7 @@ python -m sharktank.evaluate.perplexity \ --tokenizer-config-json=tokenizer_config.json ``` -### LLaMA 3.1 Scoreboard +### LLaMA 3.1 Perplexity Scoreboard | CPU | GPU | |:-------------: |:----------:| From b5d7b0e64c3ca58b0a9a23a4e6834fed9f622819 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Sat, 9 Nov 2024 03:02:22 +0000 Subject: [PATCH 07/19] Test github pages deployment --- .github/workflows/ci_eval.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 66f782407..8f08a68aa 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,6 +7,7 @@ name: CI - Perplexity on: + pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. @@ -75,13 +76,13 @@ jobs: "numpy<2.0" - name: Run perplexity test with IREE - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=perplexity/perplexity_iree.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out + publish_dir: ./perplexity test_perplexity_torch: timeout-minutes: 1000 @@ -129,10 +130,10 @@ jobs: -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - name: Run perplexity test with Torch - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=perplexity/perplexity_torch.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out + publish_dir: ./perplexity From 2de02a047b791c3546531bc22f9f27c2d0548e63 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Sat, 9 Nov 2024 03:12:54 +0000 Subject: [PATCH 08/19] README.md updates --- sharktank/sharktank/evaluate/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sharktank/sharktank/evaluate/README.md b/sharktank/sharktank/evaluate/README.md index d5895c221..d655184ab 100644 --- a/sharktank/sharktank/evaluate/README.md +++ b/sharktank/sharktank/evaluate/README.md @@ -27,12 +27,13 @@ python -m sharktank.evaluate.perplexity \ --tokenizer-config-json=tokenizer_config.json ``` -### LLaMA 3.1 Perplexity Scoreboard +### Perplexity Scoreboard | CPU | GPU | |:-------------: |:----------:| | AMD EPYC 9554 | MI300X | +#### LLaMA 3.1 |Models |Model size (GB) |Torch |IREE | |:--------|:---------------|:----------|:----------| From 074bc6674f42e73aa364a46c8847e8dd338de092 Mon Sep 17 00:00:00 2001 From: Archana Ramalingam <98564406+archana-ramalingam@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:04:03 -0800 Subject: [PATCH 09/19] Pin actions-gh-pages to latest hash Co-authored-by: Marius Brehler --- .github/workflows/ci_eval.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 8f08a68aa..5a0e7537d 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -133,7 +133,7 @@ jobs: run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=perplexity/perplexity_torch.html - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} publish_dir: ./perplexity From f95023fc97d67740ac7706899347de8e98f3fbfa Mon Sep 17 00:00:00 2001 From: Archana Ramalingam <98564406+archana-ramalingam@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:04:25 -0800 Subject: [PATCH 10/19] Pin actions-gh-pages to latest hash Co-authored-by: Marius Brehler --- .github/workflows/ci_eval.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 5a0e7537d..6a1ca40ef 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -79,7 +79,7 @@ jobs: run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=perplexity/perplexity_iree.html - name: Deploy to GitHub Pages - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} publish_dir: ./perplexity From a5a1d49a154f173035a923489298e2a598134230 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 15 Nov 2024 22:30:20 +0000 Subject: [PATCH 11/19] Remove pre-submit debug --- .github/workflows/ci_eval.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 8f08a68aa..cf18212df 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,7 +7,6 @@ name: CI - Perplexity on: - pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. From 8f70b33e49ba5a05feb674f8f1d97c2a0587ae64 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Sat, 16 Nov 2024 05:00:34 +0000 Subject: [PATCH 12/19] Update GH pages dir --- .github/workflows/ci_eval.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 999d921c7..96ec5b078 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -74,13 +74,13 @@ jobs: iree-base-runtime - name: Run perplexity test with IREE - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=perplexity/perplexity_iree.html + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./perplexity + publish_dir: ./iree_numerics test_perplexity_torch: timeout-minutes: 1000 @@ -128,10 +128,10 @@ jobs: -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - name: Run perplexity test with Torch - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=perplexity/perplexity_torch.html + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./perplexity + publish_dir: ./torch_numerics From 975e5a06d76501fc8d77898aeaaa19eaa92a1b42 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Sat, 16 Nov 2024 05:02:40 +0000 Subject: [PATCH 13/19] Test github pages deployment --- .github/workflows/ci_eval.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 96ec5b078..1e670f511 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,6 +7,7 @@ name: CI - Perplexity on: + pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. From f39d91e898427f1a6edc18c2474e55dc105cc09d Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Mon, 18 Nov 2024 20:44:32 +0000 Subject: [PATCH 14/19] Add keep_files and destination_dir --- .github/workflows/ci_eval.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 1e670f511..1eecd6be3 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -75,13 +75,15 @@ jobs: iree-base-runtime - name: Run perplexity test with IREE - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=index.html + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/numerics/iree_perplexity/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./iree_numerics + publish_dir: ./out/llm/llama/numerics/iree_perplexity + destination_dir: ./llm/llama/numerics/iree_perplexity + keep_files: true test_perplexity_torch: timeout-minutes: 1000 @@ -129,10 +131,12 @@ jobs: -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - name: Run perplexity test with Torch - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=index.html + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/numerics/torch_perplexity/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./torch_numerics + publish_dir: ./out/llm/llama/numerics/torch_perplexity + destination_dir: ./llm/llama/numerics/torch_perplexity + keep_files: true From 72b56dcfcd1fcd042af0a56792489f79e1c6e3e6 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Mon, 18 Nov 2024 21:02:44 +0000 Subject: [PATCH 15/19] Test gh-pages deployment --- sharktank/sharktank/evaluate/perplexity_iree.py | 2 +- sharktank/sharktank/evaluate/perplexity_torch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sharktank/sharktank/evaluate/perplexity_iree.py b/sharktank/sharktank/evaluate/perplexity_iree.py index 4f95ae1bd..5ad5cbbb2 100644 --- a/sharktank/sharktank/evaluate/perplexity_iree.py +++ b/sharktank/sharktank/evaluate/perplexity_iree.py @@ -183,7 +183,7 @@ def get_prompts(self): s.replace("\n", "").rstrip() for s in test_prompts if s != "" and len(s.split()) >= 20 and s.count("=") < 2 - ] + ][0:4] self.bs = len(test_prompts) diff --git a/sharktank/sharktank/evaluate/perplexity_torch.py b/sharktank/sharktank/evaluate/perplexity_torch.py index fc3aa5fca..4079c4d54 100644 --- a/sharktank/sharktank/evaluate/perplexity_torch.py +++ b/sharktank/sharktank/evaluate/perplexity_torch.py @@ -144,7 +144,7 @@ def get_prompts(self): s.replace("\n", "").rstrip() for s in test_prompts if s != "" and len(s.split()) >= 20 and s.count("=") < 2 - ] + ][0:4] logger.info(f" num_test_prompts: {len(test_prompts)}") From 507bc5dff0df10fbad587d4ddbea8da215e45dcb Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Mon, 18 Nov 2024 22:19:05 +0000 Subject: [PATCH 16/19] Rename numerics to perplexity --- .github/workflows/ci_eval.yaml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 1eecd6be3..e874d0247 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,7 +7,6 @@ name: CI - Perplexity on: - pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. @@ -75,14 +74,14 @@ jobs: iree-base-runtime - name: Run perplexity test with IREE - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/numerics/iree_perplexity/index.html + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --longrun --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/perplexity/iree_perplexity/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/llama/numerics/iree_perplexity - destination_dir: ./llm/llama/numerics/iree_perplexity + publish_dir: ./out/llm/llama/perplexity/iree_perplexity + destination_dir: ./llm/llama/perplexity/iree_perplexity keep_files: true test_perplexity_torch: @@ -131,12 +130,12 @@ jobs: -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - name: Run perplexity test with Torch - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/numerics/torch_perplexity/index.html + run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/perplexity/torch_perplexity/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/llama/numerics/torch_perplexity - destination_dir: ./llm/llama/numerics/torch_perplexity + publish_dir: ./out/llm/llama/perplexity/torch_perplexity + destination_dir: ./llm/llama/perplexity/torch_perplexity keep_files: true From 750c4b1efcf9edf6eb965b875a035bd5af6a5a2a Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Tue, 19 Nov 2024 03:20:08 +0000 Subject: [PATCH 17/19] Update README.md --- sharktank/sharktank/evaluate/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sharktank/sharktank/evaluate/README.md b/sharktank/sharktank/evaluate/README.md index d655184ab..beb0281cd 100644 --- a/sharktank/sharktank/evaluate/README.md +++ b/sharktank/sharktank/evaluate/README.md @@ -35,6 +35,6 @@ python -m sharktank.evaluate.perplexity \ #### LLaMA 3.1 -|Models |Model size (GB) |Torch |IREE | -|:--------|:---------------|:----------|:----------| -|8B f16 |16.07 |14.930181 |14.991893 | +|Models |Model size (GB) |Torch score |IREE score | +|:----------------------|:---------------|:-------------|:-------------| +|8B FP16 TP1 decomposed |16.07 |14.930181 |14.991893 | From 06665144926084b7d878f1886289d8a7ff7bcd81 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Tue, 19 Nov 2024 04:27:37 +0000 Subject: [PATCH 18/19] Test gh pages deployment --- .github/workflows/ci_eval.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index e874d0247..8880f0256 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,6 +7,7 @@ name: CI - Perplexity on: + pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. From 6e83d0e57c88f92b98af7ccce85d3ff727165e0d Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 20 Nov 2024 08:54:04 +0000 Subject: [PATCH 19/19] Revert debug changes --- .github/workflows/ci_eval.yaml | 1 - .../sharktank/evaluate/perplexity_iree.py | 20 +++++++++++++------ .../sharktank/evaluate/perplexity_torch.py | 20 +++++++++++++------ sharktank/sharktank/utils/export_artifacts.py | 18 ++++++++++++----- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 8880f0256..e874d0247 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -7,7 +7,6 @@ name: CI - Perplexity on: - pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. diff --git a/sharktank/sharktank/evaluate/perplexity_iree.py b/sharktank/sharktank/evaluate/perplexity_iree.py index 5ad5cbbb2..9701bed34 100644 --- a/sharktank/sharktank/evaluate/perplexity_iree.py +++ b/sharktank/sharktank/evaluate/perplexity_iree.py @@ -9,6 +9,7 @@ import json import time import random +import re from datetime import timedelta from tqdm import tqdm @@ -83,11 +84,18 @@ def wrapper(*args, **kwargs): start = time.time() result = func(*args, **kwargs) end = time.time() - seconds = end - start - time_taken = abs(timedelta(seconds=round(seconds))) - - if seconds < 1: - time_taken = f" {seconds * 1000} ms" + total_seconds = end - start + time_taken = abs(timedelta(seconds=total_seconds)) + hours, minutes, seconds = re.split(":", str(time_taken)) + + if total_seconds < 1: + time_taken = f" {round(total_seconds * 1000, 3)} ms" + elif total_seconds < 60: + time_taken = "{:.2f} secs".format(round(float(total_seconds), 2)) + else: + time_taken = "{:02d} hrs : {:02d} mins : {:.2f} secs".format( + int(hours), int(minutes), round(float(seconds), 2) + ) func_name = func.__name__ if func_name == "get_perplexity": @@ -183,7 +191,7 @@ def get_prompts(self): s.replace("\n", "").rstrip() for s in test_prompts if s != "" and len(s.split()) >= 20 and s.count("=") < 2 - ][0:4] + ] self.bs = len(test_prompts) diff --git a/sharktank/sharktank/evaluate/perplexity_torch.py b/sharktank/sharktank/evaluate/perplexity_torch.py index 4079c4d54..da5fc104a 100644 --- a/sharktank/sharktank/evaluate/perplexity_torch.py +++ b/sharktank/sharktank/evaluate/perplexity_torch.py @@ -8,6 +8,7 @@ import logging import time import random +import re from datetime import timedelta import json import numpy as np @@ -69,11 +70,18 @@ def wrapper(*args, **kwargs): start = time.time() result = func(*args, **kwargs) end = time.time() - seconds = end - start - time_taken = abs(timedelta(seconds=round(seconds))) - - if seconds < 1: - time_taken = f" {seconds * 1000} ms" + total_seconds = end - start + time_taken = abs(timedelta(seconds=total_seconds)) + hours, minutes, seconds = re.split(":", str(time_taken)) + + if total_seconds < 1: + time_taken = f" {round(total_seconds * 1000, 3)} ms" + elif total_seconds < 60: + time_taken = "{:.2f} secs".format(round(float(total_seconds), 2)) + else: + time_taken = "{:02d} hrs : {:02d} mins : {:.2f} secs".format( + int(hours), int(minutes), round(float(seconds), 2) + ) func_name = func.__name__ if func_name == "get_perplexity": @@ -144,7 +152,7 @@ def get_prompts(self): s.replace("\n", "").rstrip() for s in test_prompts if s != "" and len(s.split()) >= 20 and s.count("=") < 2 - ][0:4] + ] logger.info(f" num_test_prompts: {len(test_prompts)}") diff --git a/sharktank/sharktank/utils/export_artifacts.py b/sharktank/sharktank/utils/export_artifacts.py index 9deade56c..b057730c7 100644 --- a/sharktank/sharktank/utils/export_artifacts.py +++ b/sharktank/sharktank/utils/export_artifacts.py @@ -9,6 +9,7 @@ import subprocess import logging import time +import re from pathlib import Path from datetime import timedelta from typing import List, Optional @@ -107,11 +108,18 @@ def wrapper(*args, **kwargs): start = time.time() result = func(*args, **kwargs) end = time.time() - seconds = end - start - time_taken = abs(timedelta(seconds=round(seconds))) - - if seconds < 1: - time_taken = f" {seconds * 1000} ms" + total_seconds = end - start + time_taken = abs(timedelta(seconds=total_seconds)) + hours, minutes, seconds = re.split(":", str(time_taken)) + + if total_seconds < 1: + time_taken = f" {round(total_seconds * 1000, 3)} ms" + elif total_seconds < 60: + time_taken = "{:.2f} secs".format(round(float(total_seconds), 2)) + else: + time_taken = "{:02d} hrs : {:02d} mins : {:.2f} secs".format( + int(hours), int(minutes), round(float(seconds), 2) + ) func_name = func.__name__ logger.info(f" {func_name}: {time_taken}")