From abc587c8093e9771fbbf777de775cf70045328d9 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 09:17:46 +0100 Subject: [PATCH 1/6] remove device_map restriction --- optimum_benchmark/backends/pytorch/config.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/optimum_benchmark/backends/pytorch/config.py b/optimum_benchmark/backends/pytorch/config.py index ec48f639..61f9dfc0 100644 --- a/optimum_benchmark/backends/pytorch/config.py +++ b/optimum_benchmark/backends/pytorch/config.py @@ -5,7 +5,6 @@ from ...system_utils import is_rocm_system from ..config import BackendConfig -DEVICE_MAPS = ["auto", "sequential"] AMP_DTYPES = ["bfloat16", "float16"] TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"] @@ -60,9 +59,6 @@ def __post_init__(self): "Please remove it from the `model_kwargs` and set it in the backend config directly." ) - if self.device_map is not None and self.device_map not in DEVICE_MAPS: - raise ValueError(f"`device_map` must be one of {DEVICE_MAPS}. Got {self.device_map} instead.") - if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES: raise ValueError(f"`torch_dtype` must be one of {TORCH_DTYPES}. Got {self.torch_dtype} instead.") From 1c5e33e0711e4b0035629cef9dc4ab6396917fd2 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 09:18:43 +0100 Subject: [PATCH 2/6] use auto quantization dispatcher --- optimum_benchmark/backends/pytorch/backend.py | 56 ++++--------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index ea8aa0a1..5b245e81 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -8,15 +8,12 @@ from datasets import Dataset from safetensors.torch import save_file from transformers import ( - AwqConfig, - BitsAndBytesConfig, - GPTQConfig, - TorchAoConfig, Trainer, TrainerCallback, TrainerState, TrainingArguments, ) +from transformers.quantizers import AutoQuantizationConfig from ...import_utils import is_deepspeed_available, is_torch_distributed_available, is_zentorch_available from ..base import Backend @@ -286,8 +283,6 @@ def create_no_weights_model(self) -> None: def process_quantization_config(self) -> None: if self.is_gptq_quantized: - self.logger.info("\t+ Processing GPTQ config") - try: import exllamav2_kernels # noqa: F401 except ImportError: @@ -299,12 +294,7 @@ def process_quantization_config(self) -> None: "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`." ) - self.quantization_config = GPTQConfig( - **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) - ) elif self.is_awq_quantized: - self.logger.info("\t+ Processing AWQ config") - try: import exlv2_ext # noqa: F401 except ImportError: @@ -316,21 +306,10 @@ def process_quantization_config(self) -> None: "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`." ) - self.quantization_config = AwqConfig( - **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) - ) - elif self.is_bnb_quantized: - self.logger.info("\t+ Processing BitsAndBytes config") - self.quantization_config = BitsAndBytesConfig( - **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) - ) - elif self.is_torchao_quantized: - self.logger.info("\t+ Processing TorchAO config") - self.quantization_config = TorchAoConfig( - **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) - ) - else: - raise ValueError(f"Quantization scheme {self.config.quantization_scheme} not recognized") + self.logger.info("\t+ Processing AutoQuantization config") + self.quantization_config = AutoQuantizationConfig.from_dict( + getattr(self.pretrained_config, "quantization_config", {}).update(self.config.quantization_config) + ) @property def is_quantized(self) -> bool: @@ -339,13 +318,6 @@ def is_quantized(self) -> bool: and self.pretrained_config.quantization_config.get("quant_method", None) is not None ) - @property - def is_bnb_quantized(self) -> bool: - return self.config.quantization_scheme == "bnb" or ( - hasattr(self.pretrained_config, "quantization_config") - and self.pretrained_config.quantization_config.get("quant_method", None) == "bnb" - ) - @property def is_gptq_quantized(self) -> bool: return self.config.quantization_scheme == "gptq" or ( @@ -360,13 +332,6 @@ def is_awq_quantized(self) -> bool: and self.pretrained_config.quantization_config.get("quant_method", None) == "awq" ) - @property - def is_torchao_quantized(self) -> bool: - return self.config.quantization_scheme == "torchao" or ( - hasattr(self.pretrained_config, "quantization_config") - and self.pretrained_config.quantization_config.get("quant_method", None) == "torchao" - ) - @property def is_exllamav2(self) -> bool: return ( @@ -390,7 +355,10 @@ def automodel_kwargs(self) -> Dict[str, Any]: kwargs = {} if self.config.torch_dtype is not None: - kwargs["torch_dtype"] = getattr(torch, self.config.torch_dtype) + if hasattr(torch, self.config.torch_dtype): + kwargs["torch_dtype"] = getattr(torch, self.config.torch_dtype) + else: + kwargs["torch_dtype"] = self.config.torch_dtype if self.is_quantized: kwargs["quantization_config"] = self.quantization_config @@ -436,9 +404,9 @@ def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict @torch.inference_mode() def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: - assert kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1, ( - "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" - ) + assert ( + kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1 + ), "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" return self.pretrained_model.generate(**inputs, **kwargs) @torch.inference_mode() From 66a7bd170bd8d8b590d48d4f8eca473e2a896f90 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 09:22:14 +0100 Subject: [PATCH 3/6] style --- optimum_benchmark/backends/pytorch/backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index 5b245e81..51070d8b 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -404,9 +404,9 @@ def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict @torch.inference_mode() def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: - assert ( - kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1 - ), "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" + assert kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1, ( + "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" + ) return self.pretrained_model.generate(**inputs, **kwargs) @torch.inference_mode() From 660e70c8c376fcc8b2d2a0ea6e4357002a60a19e Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 11:42:18 +0100 Subject: [PATCH 4/6] fix --- optimum_benchmark/backends/pytorch/backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index 51070d8b..07e45980 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -308,7 +308,7 @@ def process_quantization_config(self) -> None: self.logger.info("\t+ Processing AutoQuantization config") self.quantization_config = AutoQuantizationConfig.from_dict( - getattr(self.pretrained_config, "quantization_config", {}).update(self.config.quantization_config) + (getattr(self.pretrained_config, "quantization_config") or {}).update(self.config.quantization_config) ) @property @@ -404,9 +404,9 @@ def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict @torch.inference_mode() def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: - assert kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1, ( - "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" - ) + assert ( + kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1 + ), "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" return self.pretrained_model.generate(**inputs, **kwargs) @torch.inference_mode() From 27d4efb59618d715a1e443f129ce75e2ea3d6a49 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 11:55:10 +0100 Subject: [PATCH 5/6] fix --- optimum_benchmark/backends/pytorch/backend.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index 07e45980..bb7212f7 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -308,28 +308,28 @@ def process_quantization_config(self) -> None: self.logger.info("\t+ Processing AutoQuantization config") self.quantization_config = AutoQuantizationConfig.from_dict( - (getattr(self.pretrained_config, "quantization_config") or {}).update(self.config.quantization_config) + dict(**getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) ) @property def is_quantized(self) -> bool: return self.config.quantization_scheme is not None or ( hasattr(self.pretrained_config, "quantization_config") - and self.pretrained_config.quantization_config.get("quant_method", None) is not None + and self.pretrained_config.quantization_config.get("quant_method") is not None ) @property def is_gptq_quantized(self) -> bool: return self.config.quantization_scheme == "gptq" or ( hasattr(self.pretrained_config, "quantization_config") - and self.pretrained_config.quantization_config.get("quant_method", None) == "gptq" + and self.pretrained_config.quantization_config.get("quant_method") == "gptq" ) @property def is_awq_quantized(self) -> bool: return self.config.quantization_scheme == "awq" or ( hasattr(self.pretrained_config, "quantization_config") - and self.pretrained_config.quantization_config.get("quant_method", None) == "awq" + and self.pretrained_config.quantization_config.get("quant_method") == "awq" ) @property @@ -341,11 +341,11 @@ def is_exllamav2(self) -> bool: ( hasattr(self.pretrained_config, "quantization_config") and hasattr(self.pretrained_config.quantization_config, "exllama_config") - and self.pretrained_config.quantization_config.exllama_config.get("version", None) == 2 + and self.pretrained_config.quantization_config.exllama_config.get("version") == 2 ) or ( "exllama_config" in self.config.quantization_config - and self.config.quantization_config["exllama_config"].get("version", None) == 2 + and self.config.quantization_config["exllama_config"].get("version") == 2 ) ) ) From eaf25fdfa07a826502b3cc5f8e4302f81287f580 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 12:20:05 +0100 Subject: [PATCH 6/6] fix --- optimum_benchmark/backends/pytorch/backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index bb7212f7..dd11ddfd 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -308,7 +308,7 @@ def process_quantization_config(self) -> None: self.logger.info("\t+ Processing AutoQuantization config") self.quantization_config = AutoQuantizationConfig.from_dict( - dict(**getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) + dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config) ) @property @@ -404,9 +404,9 @@ def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict @torch.inference_mode() def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: - assert ( - kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1 - ), "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" + assert kwargs.get("max_new_tokens") == kwargs.get("min_new_tokens") == 1, ( + "For prefilling, max_new_tokens and min_new_tokens must be equal to 1" + ) return self.pretrained_model.generate(**inputs, **kwargs) @torch.inference_mode()