diff --git a/.ci/scripts/__init__.py b/.ci/scripts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.ci/scripts/gather_benchmark_configs.py b/.ci/scripts/gather_benchmark_configs.py index a927153778..7cc9d7e136 100755 --- a/.ci/scripts/gather_benchmark_configs.py +++ b/.ci/scripts/gather_benchmark_configs.py @@ -9,8 +9,10 @@ import logging import os import re -from typing import Any, Dict +import sys +from typing import Any, Dict, List +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) from examples.models import MODEL_NAME_TO_MODEL @@ -45,6 +47,79 @@ } +def extract_all_configs(data, target_os=None): + if isinstance(data, dict): + # If target_os is specified, include "xplat" and the specified branch + include_branches = {"xplat", target_os} if target_os else data.keys() + return [ + v + for key, value in data.items() + if key in include_branches + for v in extract_all_configs(value, target_os) + ] + elif isinstance(data, list): + return [v for item in data for v in extract_all_configs(item, target_os)] + else: + return [data] + + +def generate_compatible_configs(model_name: str, target_os=None) -> List[str]: + """ + Generate a list of compatible benchmark configurations for a given model name and target OS. + + Args: + model_name (str): The name of the model to generate configurations for. + target_os (Optional[str]): The target operating system (e.g., 'android', 'ios'). + + Returns: + List[str]: A list of compatible benchmark configurations. + + Raises: + None + + Example: + generate_compatible_configs('meta-llama/Llama-3.2-1B', 'ios') -> ['llama3_fb16', 'llama3_coreml_ane'] + """ + configs = [] + if is_valid_huggingface_model_id(model_name): + if model_name.startswith("meta-llama/"): + # LLaMA models + repo_name = model_name.split("meta-llama/")[1] + if "qlora" in repo_name.lower(): + configs.append("llama3_qlora") + elif "spinquant" in repo_name.lower(): + configs.append("llama3_spinquant") + else: + configs.append("llama3_fb16") + configs.extend( + [ + config + for config in BENCHMARK_CONFIGS.get(target_os, []) + if config.startswith("llama") + ] + ) + else: + # Non-LLaMA models + configs.append("hf_xnnpack_fp32") + elif model_name in MODEL_NAME_TO_MODEL: + # ExecuTorch in-tree non-GenAI models + configs.append("xnnpack_q8") + if target_os != "xplat": + # Add OS-specific configs + configs.extend( + [ + config + for config in BENCHMARK_CONFIGS.get(target_os, []) + if not config.startswith("llama") + ] + ) + else: + # Skip unknown models with a warning + logging.warning(f"Unknown or invalid model name '{model_name}'. Skipping.") + + return configs + + def parse_args() -> Any: """ Parse command-line arguments. @@ -82,6 +157,11 @@ def comma_separated(value: str): type=comma_separated, # Use the custom parser for comma-separated values help=f"Comma-separated device names. Available devices: {list(DEVICE_POOLS.keys())}", ) + parser.add_argument( + "--configs", + type=comma_separated, # Use the custom parser for comma-separated values + help=f"Comma-separated benchmark configs. Available configs: {extract_all_configs(BENCHMARK_CONFIGS)}", + ) return parser.parse_args() @@ -98,11 +178,16 @@ def set_output(name: str, val: Any) -> None: set_output("benchmark_configs", {"include": [...]}) """ - if os.getenv("GITHUB_OUTPUT"): - print(f"Setting {val} to GitHub output") - with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env: - print(f"{name}={val}", file=env) - else: + github_output = os.getenv("GITHUB_OUTPUT") + if not github_output: + print(f"::set-output name={name}::{val}") + return + + try: + with open(github_output, "a") as env: + env.write(f"{name}={val}\n") + except PermissionError: + # Fall back to printing in case of permission error in unit tests print(f"::set-output name={name}::{val}") @@ -123,7 +208,7 @@ def is_valid_huggingface_model_id(model_name: str) -> bool: return bool(re.match(pattern, model_name)) -def get_benchmark_configs() -> Dict[str, Dict]: +def get_benchmark_configs() -> Dict[str, Dict]: # noqa: C901 """ Gather benchmark configurations for a given set of models on the target operating system and devices. @@ -153,48 +238,26 @@ def get_benchmark_configs() -> Dict[str, Dict]: } """ args = parse_args() - target_os = args.os devices = args.devices models = args.models + target_os = args.os + target_configs = args.configs benchmark_configs = {"include": []} for model_name in models: configs = [] - if is_valid_huggingface_model_id(model_name): - if model_name.startswith("meta-llama/"): - # LLaMA models - repo_name = model_name.split("meta-llama/")[1] - if "qlora" in repo_name.lower(): - configs.append("llama3_qlora") - elif "spinquant" in repo_name.lower(): - configs.append("llama3_spinquant") - else: - configs.append("llama3_fb16") - configs.extend( - [ - config - for config in BENCHMARK_CONFIGS.get(target_os, []) - if config.startswith("llama") - ] + configs.extend(generate_compatible_configs(model_name, target_os)) + print(f"Discovered all supported configs for model '{model_name}': {configs}") + if target_configs is not None: + for config in target_configs: + if config not in configs: + raise Exception( + f"Unsupported config '{config}' for model '{model_name}' on '{target_os}'. Skipped.\n" + f"Supported configs are: {configs}" ) - else: - # Non-LLaMA models - configs.append("hf_xnnpack_fp32") - elif model_name in MODEL_NAME_TO_MODEL: - # ExecuTorch in-tree non-GenAI models - configs.append("xnnpack_q8") - configs.extend( - [ - config - for config in BENCHMARK_CONFIGS.get(target_os, []) - if not config.startswith("llama") - ] - ) - else: - # Skip unknown models with a warning - logging.warning(f"Unknown or invalid model name '{model_name}'. Skipping.") - continue + configs = target_configs + print(f"Using provided configs {configs} for model '{model_name}'") # Add configurations for each valid device for device in devices: diff --git a/.ci/scripts/tests/test_gather_benchmark_configs.py b/.ci/scripts/tests/test_gather_benchmark_configs.py new file mode 100644 index 0000000000..855f815360 --- /dev/null +++ b/.ci/scripts/tests/test_gather_benchmark_configs.py @@ -0,0 +1,189 @@ +import importlib.util +import os +import subprocess +import sys +import unittest +from unittest.mock import mock_open, patch + +import pytest + +# Dynamically import the script +script_path = os.path.join(".ci", "scripts", "gather_benchmark_configs.py") +spec = importlib.util.spec_from_file_location("gather_benchmark_configs", script_path) +gather_benchmark_configs = importlib.util.module_from_spec(spec) +spec.loader.exec_module(gather_benchmark_configs) + + +@pytest.mark.skipif( + sys.platform != "linux", reason="The script under test runs on Linux runners only" +) +class TestGatehrBenchmarkConfigs(unittest.TestCase): + + def test_extract_all_configs_android(self): + android_configs = gather_benchmark_configs.extract_all_configs( + gather_benchmark_configs.BENCHMARK_CONFIGS, "android" + ) + self.assertIn("xnnpack_q8", android_configs) + self.assertIn("qnn_q8", android_configs) + self.assertIn("llama3_spinquant", android_configs) + self.assertIn("llama3_qlora", android_configs) + + def test_extract_all_configs_ios(self): + ios_configs = gather_benchmark_configs.extract_all_configs( + gather_benchmark_configs.BENCHMARK_CONFIGS, "ios" + ) + + self.assertIn("xnnpack_q8", ios_configs) + self.assertIn("coreml_fp16", ios_configs) + self.assertIn("mps", ios_configs) + self.assertIn("llama3_coreml_ane", ios_configs) + self.assertIn("llama3_spinquant", ios_configs) + self.assertIn("llama3_qlora", ios_configs) + + def test_generate_compatible_configs_llama_model(self): + model_name = "meta-llama/Llama-3.2-1B" + target_os = "ios" + result = gather_benchmark_configs.generate_compatible_configs( + model_name, target_os + ) + expected = ["llama3_fb16", "llama3_coreml_ane"] + self.assertEqual(result, expected) + + target_os = "android" + result = gather_benchmark_configs.generate_compatible_configs( + model_name, target_os + ) + expected = ["llama3_fb16"] + self.assertEqual(result, expected) + + def test_generate_compatible_configs_quantized_llama_model(self): + model_name = "meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8" + result = gather_benchmark_configs.generate_compatible_configs(model_name, None) + expected = ["llama3_spinquant"] + self.assertEqual(result, expected) + + model_name = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" + result = gather_benchmark_configs.generate_compatible_configs(model_name, None) + expected = ["llama3_qlora"] + self.assertEqual(result, expected) + + def test_generate_compatible_configs_non_genai_model(self): + model_name = "mv2" + target_os = "xplat" + result = gather_benchmark_configs.generate_compatible_configs( + model_name, target_os + ) + expected = ["xnnpack_q8"] + self.assertEqual(result, expected) + + target_os = "android" + result = gather_benchmark_configs.generate_compatible_configs( + model_name, target_os + ) + expected = ["xnnpack_q8", "qnn_q8"] + self.assertEqual(result, expected) + + target_os = "ios" + result = gather_benchmark_configs.generate_compatible_configs( + model_name, target_os + ) + expected = ["xnnpack_q8", "coreml_fp16", "mps"] + self.assertEqual(result, expected) + + def test_generate_compatible_configs_unknown_model(self): + model_name = "unknown_model" + target_os = "ios" + result = gather_benchmark_configs.generate_compatible_configs( + model_name, target_os + ) + self.assertEqual(result, []) + + def test_is_valid_huggingface_model_id_valid(self): + valid_model = "meta-llama/Llama-3.2-1B" + self.assertTrue( + gather_benchmark_configs.is_valid_huggingface_model_id(valid_model) + ) + + @patch("builtins.open", new_callable=mock_open) + @patch("os.getenv", return_value=None) + def test_set_output_no_github_env(self, mock_getenv, mock_file): + with patch("builtins.print") as mock_print: + gather_benchmark_configs.set_output("test_name", "test_value") + mock_print.assert_called_with("::set-output name=test_name::test_value") + + def test_device_pools_contains_all_devices(self): + expected_devices = [ + "apple_iphone_15", + "apple_iphone_15+ios_18", + "samsung_galaxy_s22", + "samsung_galaxy_s24", + "google_pixel_8_pro", + ] + for device in expected_devices: + self.assertIn(device, gather_benchmark_configs.DEVICE_POOLS) + + def test_gather_benchmark_configs_cli(self): + args = { + "models": "mv2,dl3", + "os": "ios", + "devices": "apple_iphone_15", + "configs": None, + } + + cmd = ["python", ".ci/scripts/gather_benchmark_configs.py"] + for key, value in args.items(): + if value is not None: + cmd.append(f"--{key}") + cmd.append(value) + + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, f"Error: {result.stderr}") + self.assertIn('"model": "mv2"', result.stdout) + self.assertIn('"model": "dl3"', result.stdout) + self.assertIn('"config": "coreml_fp16"', result.stdout) + self.assertIn('"config": "xnnpack_q8"', result.stdout) + self.assertIn('"config": "mps"', result.stdout) + + def test_gather_benchmark_configs_cli_specified_configs(self): + args = { + "models": "mv2,dl3", + "os": "ios", + "devices": "apple_iphone_15", + "configs": "coreml_fp16,xnnpack_q8", + } + + cmd = ["python", ".ci/scripts/gather_benchmark_configs.py"] + for key, value in args.items(): + if value is not None: + cmd.append(f"--{key}") + cmd.append(value) + + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 0, f"Error: {result.stderr}") + self.assertIn('"model": "mv2"', result.stdout) + self.assertIn('"model": "dl3"', result.stdout) + self.assertIn('"config": "coreml_fp16"', result.stdout) + self.assertIn('"config": "xnnpack_q8"', result.stdout) + self.assertNotIn('"config": "mps"', result.stdout) + + def test_gather_benchmark_configs_cli_specified_configs_raise(self): + args = { + "models": "mv2,dl3", + "os": "ios", + "devices": "apple_iphone_15", + "configs": "qnn_q8", + } + + cmd = ["python", ".ci/scripts/gather_benchmark_configs.py"] + for key, value in args.items(): + if value is not None: + cmd.append(f"--{key}") + cmd.append(value) + + result = subprocess.run(cmd, capture_output=True, text=True) + self.assertEqual(result.returncode, 1, f"Error: {result.stderr}") + self.assertIn("Unsupported config 'qnn_q8'", result.stderr) + + +if __name__ == "__main__": + unittest.main() diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index f2a289e230..5d34bd8626 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -74,19 +74,27 @@ jobs: CRON_DEFAULT_DEVICES: samsung_galaxy_s22 run: | set -eux + + ARGS="--os android" + MODELS="${{ inputs.models }}" if [ -z "$MODELS" ]; then MODELS="$CRON_DEFAULT_MODELS" fi + ARGS="$ARGS --models $MODELS" + DEVICES="${{ inputs.devices }}" if [ -z "$DEVICES" ]; then DEVICES="$CRON_DEFAULT_DEVICES" fi + ARGS="$ARGS --devices $DEVICES" + + BENCHMARK_CONFIGS="${{ inputs.benchmark_configs }}" + if [ -n "$BENCHMARK_CONFIGS" ]; then + ARGS="$ARGS --configs $BENCHMARK_CONFIGS" + fi - PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py \ - --os "android" \ - --models $MODELS \ - --devices $DEVICES + PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS prepare-test-specs: runs-on: linux.2xlarge diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 4fb3e9711d..f6424c4fa9 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -74,19 +74,27 @@ jobs: CRON_DEFAULT_DEVICES: apple_iphone_15 run: | set -eux + + ARGS="--os ios" + MODELS="${{ inputs.models }}" if [ -z "$MODELS" ]; then MODELS="$CRON_DEFAULT_MODELS" fi + ARGS="$ARGS --models $MODELS" + DEVICES="${{ inputs.devices }}" if [ -z "$DEVICES" ]; then DEVICES="$CRON_DEFAULT_DEVICES" fi + ARGS="$ARGS --devices $DEVICES" + + BENCHMARK_CONFIGS="${{ inputs.benchmark_configs }}" + if [ -n "$BENCHMARK_CONFIGS" ]; then + ARGS="$ARGS --configs $BENCHMARK_CONFIGS" + fi - PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py \ - --os "ios" \ - --models $MODELS \ - --devices $DEVICES + PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}" diff --git a/pytest.ini b/pytest.ini index d0c27fdfab..1502f1749f 100644 --- a/pytest.ini +++ b/pytest.ini @@ -12,6 +12,7 @@ addopts = # For GitHub testing this is setup/executed in the unittest-arm job see .github/workflows/pull.yml for more info. --ignore-glob=backends/arm/**/* # explicitly list out tests that are running successfully in oss + .ci/scripts/tests examples/models/test devtools/ --ignore=devtools/visualization/visualization_utils_test.py