diff --git a/README.md b/README.md index 593f5872..a5a9d666 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,11 @@ Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. -->

- Friendli Logo + + + + Friendli Logo +

Supercharge Generative AI Serving with Friendli 🚀

diff --git a/friendli/cli/model.py b/friendli/cli/model.py index dff7bba3..6a50bd50 100644 --- a/friendli/cli/model.py +++ b/friendli/cli/model.py @@ -6,25 +6,12 @@ from __future__ import annotations -import os -from typing import Optional, cast - import typer -import yaml -from friendli.enums import CheckpointFileType, ModelDataType -from friendli.errors import ( - CheckpointConversionError, - InvalidConfigError, - NotFoundError, - NotSupportedQuantConfigError, - QuantizationError, -) from friendli.formatter import TableFormatter from friendli.sdk.client import Friendli -from friendli.utils.compat import model_dump, model_parse +from friendli.utils.compat import model_dump from friendli.utils.decorator import check_api -from friendli.utils.format import secho_error_and_exit app = typer.Typer( no_args_is_help=True, @@ -53,350 +40,3 @@ def list_models(): models = client.model.list() models_ = [model_dump(model) for model in iter(models)] table_formatter.render(models_) - - -@app.command() -def convert( - model_name_or_path: str = typer.Option( - ..., - "--model-name-or-path", - "-m", - help="Hugging Face pretrained model name or path to the saved model checkpoint.", - ), - output_dir: str = typer.Option( - ..., - "--output-dir", - "-o", - help=( - "Directory path to save the converted checkpoint and related configuration " - "files. Three files will be created in the directory: `model.h5`, " - "`tokenizer.json`, and `attr.yaml`. " - "The `model.h5` or `model.safetensors` is the converted checkpoint and can be renamed using " - "the `--output-model-filename` option. " - "The `tokenizer.json` is the Friendli-compatible tokenizer file, which should " - "be uploaded along with the checkpoint file to tokenize the model input " - "and output. " - "The `attr.yaml` is the checkpoint attribute file, to be used when uploading " - "the converted model to Friendli. You can designate the file name using " - "the `--output-attr-filename` option." - ), - ), - data_type: ModelDataType = typer.Option( - None, "--data-type", "-dt", help="The data type of converted checkpoint." - ), - cache_dir: Optional[str] = typer.Option( - None, "--cache-dir", help="Directory for downloading checkpoint." - ), - dry_run: bool = typer.Option( - False, "--dry-run", help="Only check conversion avaliability." - ), - output_model_file_name: str = typer.Option( - None, - "--output-model-filename", - help="Name of the converted checkpoint file." - "The default file name is `model.h5` when `--output-ckpt-file-type` is `hdf5` or `model.safetensors` when `--output-ckpt-file-type` is `safetensors`.", - ), - output_ckpt_file_type: CheckpointFileType = typer.Option( - CheckpointFileType.SAFETENSORS, - "--output-ckpt-file-type", - help="File format of the converted checkpoint file. The default output ckpt file type is `safetensors`.", - ), - output_attr_file_name: str = typer.Option( - "attr.yaml", - "--output-attr-filename", - help="Name of the checkpoint attribute file.", - ), - quantize: bool = typer.Option( - False, - "--quantize", - help="Quantize the model before conversion", - ), - quant_config_file: Optional[typer.FileText] = typer.Option( - None, - "--quant-config-file", - help="Path to the quantization configuration file.", - ), -): - """Convert huggingface's model checkpoint to Friendli format. - - When a checkpoint is in the Hugging Face format, it cannot be directly served. - It requires conversion to the Friendli format for serving. The conversion - process involves copying the original checkpoint and transforming it into a - checkpoint in the Friendli format (*.h5). - - :::caution - The `friendli model convert` is available only when the package is installed with - `pip install "friendli-client[mllib]"`. - ::: - - ### Apply quantization - - If you want to quantize the model along with the conversion, `--quantize` option - should be provided. You can customize the quantization configuration by describing - it in a YAML file and providing the path to the file to `--quant-config-file` - option. When `--quantize` option is used without providing `--quant-config-file`, - the following configuration is used by default. - - ```yaml - # Default quantization configuration - mode: awq - device: cuda:0 - seed: 42 - offload: true - calibration_dataset: - path_or_name: lambada - format: json - split: validation - lookup_column_name: text - num_samples: 128 - max_length: 512 - batch_size: 1 - awq_args: - quant_bit: 4 - quant_group_size: 64 - ``` - - - **`mode`**: Quantization scheme to apply. Defaults to "awq". - - **`device`**: Device to run the quantization process. Defaults to "cuda:0". - - **`seed`**: Random seed. Defaults to 42. - - **`offload`**: When enabled, this option significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to true. - - **`calibration_dataset`** - - **`path_or_name`**: Path or name of the dataset. Datasets from either the Hugging Face Datasets Hub or local file system can be used. Defaults to "lambada". - - **`format`**: Format of datasets. Defaults to "json". - - **`split`**: Which split of the data to load. Defaults to "validation". - - **`lookup_column_name`**: The name of a column in the dataset to be used as calibration inputs. Defaults to "text". - - **`num_samples`**: The number of dataset samples to use for calibration. Note that the dataset will be shuffled before sampling. Defaults to 512. - - **`max_length`**: The maximum length of a calibration input sequence. Defauts to 512. - - **`batch_size`**: The number of samples to process in a single batch. Defaults to 1. - - **`awq_args`** (Fill in this field only for "awq" mode) - - **`quant_bit`** : Bit width of integers to represent weights. Possible values are `4` or `8`. Defaults to 4. - - **`quant_group_size`**: Group size of quantized matrices. 64 is the only supported value at this time. Defaults to 64. - - :::tip - If you encounter OOM issues when running with AWQ, try enabling the `offload` option. - ::: - - :::tip - If you set `percentile` in quant-config-file into 100, - the quantization range will be determined by the maximum absolute values of the activation tensors. - ::: - - :::info - Currently, [AWQ](https://arxiv.org/abs/2306.00978) is the only supported quantization scheme. - ::: - - :::info - AWQ is supported only for models with architecture listed as follows: - - - `GPTNeoXForCausalLM` - - `GPTJForCausalLM` - - `LlamaForCausalLM` - - `MPTForCausalLM` - ::: - - """ - # pylint: disable=too-many-branches - try: - # pylint: disable=import-outside-toplevel - from friendli.modules.converter.convert import convert_checkpoint - from friendli.modules.quantizer.schema.config import ( - AWQConfig, - OneOfQuantConfig, - QuantConfig, - ) - from friendli.modules.quantizer_v2.quantize import quantize_checkpoint - from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig - - # pylint: enable=import-outside-toplevel - except ModuleNotFoundError as exc: - secho_error_and_exit(str(exc)) - - if not os.path.isdir(output_dir): - if os.path.exists(output_dir): - secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.") - os.mkdir(output_dir) - - quant_config: Optional[OneOfQuantConfig] = None - use_quantizer_v2 = False - if quantize: - if quant_config_file: - try: - quant_config_dict = cast(dict, yaml.safe_load(quant_config_file.read())) - except yaml.YAMLError as err: - secho_error_and_exit(f"Failed to load the quant config file: {err}") - if quant_config_dict["mode"] == "int8": - quant_config = model_parse( # type: ignore - Int8QuantConfig, quant_config_dict - ) - else: - quant_config = model_parse( - QuantConfig, {"config": quant_config_dict} - ).config - - # TODO(SA): All Quantization mode will be migrated to V2. After migration, please remove it. - else: - quant_config = AWQConfig() - - if isinstance(quant_config, Int8QuantConfig): - use_quantizer_v2 = True - - default_names = { - CheckpointFileType.HDF5: "model.h5", - CheckpointFileType.SAFETENSORS: "model.safetensors", - } - output_model_file_name = ( - output_model_file_name or default_names[output_ckpt_file_type] - ) - - if use_quantizer_v2: - if output_ckpt_file_type == CheckpointFileType.HDF5: - secho_error_and_exit( - f"int8 quantization only supports `safetensors` output_ckpt_file_type. Current output_ckpt_file_type: {output_ckpt_file_type}" - ) - try: - assert isinstance(quant_config, Int8QuantConfig) - quantize_checkpoint( - model_name_or_path=model_name_or_path, - output_dir=output_dir, - cache_dir=cache_dir, - dry_run=dry_run, - quant_config=quant_config, - ) - except (NotFoundError, QuantizationError, NotSupportedQuantConfigError) as exc: - secho_error_and_exit(str(exc)) - else: - try: - convert_checkpoint( - model_name_or_path=model_name_or_path, - output_model_file_name=output_model_file_name, - output_ckpt_file_type=output_ckpt_file_type, - output_attr_file_name=output_attr_file_name, - output_dir=output_dir, - data_type=data_type, - cache_dir=cache_dir, - dry_run=dry_run, - quantize=quantize, - quant_config=quant_config, - ) - except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc: - secho_error_and_exit(str(exc)) - - msg = ( - f"Checkpoint({model_name_or_path}) can be converted." - if dry_run - else f"Checkpoint({model_name_or_path}) has been converted successfully." - ) - typer.secho(msg) - - -@app.command() -def convert_adapter( - adapter_name_or_path: str = typer.Option( - ..., - "--adapter-name-or-path", - "-a", - help="Hugging Face pretrained adapter name or path to the saved adapter checkpoint.", - ), - output_dir: str = typer.Option( - ..., - "--output-dir", - "-o", - help=( - "Directory path to save the converted adapter checkpoint and related configuration " - "files. Two files will be created in the directory: `adapter.h5`, " - "and `attr.yaml`. " - "The `adapter.h5` is the converted checkpoint and can be renamed using " - "the `--output-adapter-filename` option. " - "The `attr.yaml` is the adapter checkpoint attribute file, to be used when uploading " - "the converted model to Friendli. You can designate the file name using " - "the `--output-attr-filename` option." - ), - ), - data_type: ModelDataType = typer.Option( - None, "--data-type", "-dt", help="The data type of converted checkpoint." - ), - base_model_name_or_path: Optional[str] = typer.Option( - None, - "--base-model-name-or-path", - "-b", - help=( - "Hugging Face model name or path to the saved backbone checkpoint. " - "By default, we use the `base_model_name_or_path` in adapter_config.json." - ), - ), - cache_dir: Optional[str] = typer.Option( - None, "--cache-dir", help="Directory for downloading checkpoint." - ), - dry_run: bool = typer.Option( - False, "--dry-run", help="Only check conversion avaliability." - ), - output_adapter_filename: str = typer.Option( - "adapter.h5", - "--output-adapter-filename", - help="Name of the converted adapter checkpoint file.", - ), - output_attr_filename: str = typer.Option( - "adapter_attr.yaml", - "--output-attr-filename", - help="Name of the adapter checkpoint attribute file.", - ), -) -> None: - """Convert huggingface's adapter checkpoint to Friendli format. - - When an adapter checkpoint is in the Hugging Face PEFT format, it cannot - be directly served in Friendli. It requires conversion to the Friendli format. - The conversion process involves copying the original adapter checkpoint and - transforming it into a checkpoint in the Friendli format (*.h5). - - This function does not include the `friendli model convert` command. i.e. - `friendli model convert-adapter` only converts adapter's parameters, not backbone's. - - :::caution - The `friendli model convert-adapter` is available only when the package is installed with - `pip install "friendli-client[mllib]"`. - ::: - - """ - try: - from friendli.modules.converter.convert import ( # pylint: disable=import-outside-toplevel - convert_adapter_checkpoint, - ) - except ModuleNotFoundError as exc: - secho_error_and_exit(str(exc)) - - if not os.path.isdir(output_dir): - if os.path.exists(output_dir): - secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.") - os.mkdir(output_dir) - - # Engine cannot load a Safetensors Lora ckpt yet. - output_adapter_file_type = CheckpointFileType.HDF5 - default_names = { - CheckpointFileType.HDF5: "adapter.h5", - CheckpointFileType.SAFETENSORS: "adapter.safetensors", - } - output_adapter_filename = ( - output_adapter_filename or default_names[output_adapter_file_type] - ) - - try: - convert_adapter_checkpoint( - adapter_name_or_path=adapter_name_or_path, - output_attr_filename=output_attr_filename, - output_dir=output_dir, - output_adapter_filename=output_adapter_filename, - base_model_name_or_path=base_model_name_or_path, - data_type=data_type, - output_adapter_file_type=output_adapter_file_type, - cache_dir=cache_dir, - dry_run=dry_run, - ) - except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc: - secho_error_and_exit(str(exc)) - - msg = ( - f"Checkpoint({adapter_name_or_path}) can be converted." - if dry_run - else f"Checkpoint({adapter_name_or_path}) has been converted successfully." - ) - typer.secho(msg) diff --git a/friendli/modules/__init__.py b/friendli/modules/__init__.py deleted file mode 100644 index e603ace1..00000000 --- a/friendli/modules/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli modules.""" diff --git a/friendli/modules/converter/__init__.py b/friendli/modules/converter/__init__.py deleted file mode 100644 index d0213cf4..00000000 --- a/friendli/modules/converter/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli model converter.""" diff --git a/friendli/modules/converter/base.py b/friendli/modules/converter/base.py deleted file mode 100644 index 9eaca2ec..00000000 --- a/friendli/modules/converter/base.py +++ /dev/null @@ -1,560 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Converter.""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from collections.abc import Generator -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union, cast - -import numpy as np -import torch -from peft import PeftType # type: ignore[import] # pylint: disable=import-error -from peft.config import PeftConfig -from peft.tuners.lora import ( # type: ignore[import] # pylint: disable=import-error - LoraConfig, -) -from transformers import GenerationConfig, PretrainedConfig # type: ignore[import] - -from friendli.enums import ModelDataType -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.interface import ( - DecoderTFBlockConversionInterface, - EncoderTFBlockConversionInterface, - ModelConversionInterface, - NonTFBlockConversionInterface, -) -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import get_model_data_type - -SUPPORTED_GELU_FAMILY = [ - "gelu", - "gelu_fast", - "gelu_new", - "gelu_python", - "gelu_pytorch_tanh", - "gelu_accurate", -] -SUPPORTED_HEAD_SIZE = [64, 80, 96, 128, 256] - -MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP = { - "gptj": {"q_proj", "k_proj", "v_proj", "out_proj", "fc_in", "fc_out", "wte"}, - "llama": { - "q_proj", - "k_proj", - "v_proj", - "o_proj", - "gate_proj", - "up_proj", - "down_proj", - }, - "mistral": { - "q_proj", - "k_proj", - "v_proj", - "o_proj", - "gate_proj", - "up_proj", - "down_proj", - }, - "mpt": {"Wqkv", "out_proj", "up_proj", "down_proj"}, -} -# TODO: remove this const map when engine supports lm head LoRA -MODEL_TYPE_TO_UNSUPPORTED_LORA_TARGET_MODULES_MAP = { - "gptj": {"lm_head"}, - "llama": {"lm_head"}, - "mistral": {"lm_head"}, - "mpt": {"lm_head"}, -} - -ENCODER_PREFIX = "encoder" -DECODER_PREFIX = "decoder" - - -class AbstractConverter(ModelConversionInterface, ABC): - """Abstract class for converting Hugging Face checkpoint to Friendli checkpoint. - - Attributes: - config (PreTrainedConfig): Hugging Face model configuration. - generation_config (Optional[GenerationConfig]): Hugginface generation config. - When set to None, `config` is used for configuring generation. - data_type (Optional(ModelDataType)): Data type for the Friendli checkpoint. - - """ - - def __init__( - self, - config: PretrainedConfig, - generation_config: Optional[GenerationConfig], - data_type: Optional[ModelDataType], - ) -> None: - """Initialize converter.""" - self.config = config - self.generation_config = generation_config - self.data_type = ( - data_type if data_type else get_model_data_type(config.torch_dtype) - ) - - def get_eos_token_id(self) -> Optional[int]: - """Get ID of EOS token.""" - generation_eos_token_id = None - if self.generation_config is not None: - generation_eos_token_id = self.generation_config.eos_token_id - - config_eos_token_id = self.config.eos_token_id - - if generation_eos_token_id is None: - eos_token_id = config_eos_token_id - else: - if generation_eos_token_id != config_eos_token_id: - logger.warn( - "'eos_token' is different in generation_config (%s) and config (%s). " - "Please fill the correct value.", - generation_eos_token_id, - config_eos_token_id, - ) - eos_token_id = None - else: - eos_token_id = config_eos_token_id - - if eos_token_id is None: - logger.warn( - "'eos_token' cannot be automatically configured. " - "Please fill in the field by yourself." - ) - - return eos_token_id - - def token_embed_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape embedding layer's weight to Friendli format. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped embedding weight. - - """ - assert len(params) == 1 - return params[0] - - def pos_embed_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape position embedding layer's weight to Friendli format. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped position embedding weight. - """ - assert len(params) == 1 - return params[0] - - def head_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape head layer's weight to Friendli format. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped head weight. - - """ - assert len(params) == 1 - return params[0] - - def linear_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape linear layer's weight to Friendli format. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped linear weight. - - """ - assert len(params) == 1 - param = params[0].transpose(0, 1) - return param - - def linear_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape linear layer's bias to Friendli format. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped linear bias. - - """ - assert len(params) == 1 - return params[0] - - def ln_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape layer norm layer's weight to Friendli format. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped layer norm weight. - - """ - assert len(params) == 1 - return params[0] - - def ln_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape layer norm layer's bias to Friendli format. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped layer norm bias. - - """ - assert len(params) == 1 - return params[0] - - def qkv_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape qkv layer's weight to Friendli format. - - In the original checkpoint, the qkv weight is stored as a single tensor or - separated by three tensors. In the Friendli checkpoint, it is stored as a single tensor. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped qkv weight. - - """ - param = torch.cat(params, dim=0) - param = param.transpose(0, 1) - return param - - def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape qkv layer's bias to Friendli format. - - In the original checkpoint, the qkv weight is stored as a single tensor or - separated by three tensors. In the Friendli checkpoint, it is stored as a single tensor. - - Args: - state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint. - layer (str): The layer name of the original checkpoint. - per_layer_postfixes (List[str]): The list of postfixes of the layer. - - Returns: - The tensor of reshaped qkv bias. - - """ - param = torch.cat(params, dim=0) - return param - - -class DecoderOnlyConverter( - AbstractConverter, - NonTFBlockConversionInterface, - DecoderTFBlockConversionInterface, -): - """Converter for Decoder-Only models.""" - - def check_config(self) -> None: - """Check if a convertible form of the checkpoint from the decoder-only model config.""" - super().check_config() - if self.decoder_head_size not in SUPPORTED_HEAD_SIZE: - raise NotSupportedCheckpointError( - invalid_option=f"decoder_head_size={self.decoder_head_size}", - valid_options=SUPPORTED_HEAD_SIZE, - ) - - def get_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Get List of conversion informations for Decoder-Only model.""" - return self.non_transformer_convert_info_list + self.decoder_convert_info_list - - -class EncoderDecoderConverter( - AbstractConverter, - NonTFBlockConversionInterface, - EncoderTFBlockConversionInterface, - DecoderTFBlockConversionInterface, -): - """Converter for Encoder-Decoder models.""" - - def check_config(self) -> None: - """Check if a convertible form of the checkpoint from the encoder-decoder model config.""" - if self.decoder_head_size not in SUPPORTED_HEAD_SIZE: - raise NotSupportedCheckpointError( - invalid_option=f"decoder_head_size={self.decoder_head_size}", - valid_options=SUPPORTED_HEAD_SIZE, - ) - - def get_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Get list of conversion informations for Encoder-Decoder model.""" - return ( - self.non_transformer_convert_info_list - + self.decoder_convert_info_list - + self.encoder_convert_info_list - ) - - def get_decoder_start_token_id(self) -> Optional[int]: - """Get ID of decoder start token.""" - generation_decoder_start_token_id = None - if self.generation_config is not None: - generation_decoder_start_token_id = ( - self.generation_config.decoder_start_token_id - ) - - config_decoder_start_token_id = self.config.decoder_start_token_id - - if generation_decoder_start_token_id is None: - decoder_start_token_id = config_decoder_start_token_id - else: - if generation_decoder_start_token_id != config_decoder_start_token_id: - logger.warn( - "'decoder_start_token_id' is different in generation_config " - "(%s) and config (%s). Please fill the correct value.", - generation_decoder_start_token_id, - config_decoder_start_token_id, - ) - decoder_start_token_id = None - else: - decoder_start_token_id = config_decoder_start_token_id - - if decoder_start_token_id is None: - logger.warn( - "'decoder_start_token' cannot be automatically configured. " - "Please fill in the field by yourself." - ) - - return decoder_start_token_id - - -class DecoderOnlyLoraConverter(AbstractConverter): - """Converter for LoRA modules in the models.""" - - def __init__( - self, - converter: AbstractConverter, - adapter_config: PeftConfig, - ) -> None: - """Initialize LoRA Converter.""" - super().__init__( - config=converter.config, - generation_config=converter.generation_config, - data_type=converter.data_type, - ) - self.converter = cast(DecoderOnlyConverter, converter) - self.adapter_config = cast(LoraConfig, adapter_config) - - def check_config(self) -> None: - """Check if a convertible form of the checkpoint from the LoRAconfig.""" - if self.adapter_config.peft_type != PeftType.LORA: - raise NotSupportedCheckpointError( - invalid_option=f"peft_type={self.adapter_config.peft_type}", - valid_options=[str(PeftType.LORA)], - ) - if ( - self.config.model_type - not in MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP - ): - raise NotSupportedCheckpointError( - invalid_option=f"model_type={self.config.model_type} for LORA", - valid_options=list( - MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP.keys() - ), - ) - if ( - self.adapter_config.layers_pattern is not None - and len(self.adapter_config.layers_pattern) > 0 - ): - raise NotSupportedCheckpointError( - invalid_option=f"layers_pattern={self.adapter_config.layers_pattern}", - valid_options=[None, [], ""], - ) - if ( - self.adapter_config.rank_pattern is not None - and len(self.adapter_config.rank_pattern) > 0 - ): - raise NotSupportedCheckpointError( - invalid_option=f"rank_pattern={self.adapter_config.rank_pattern}", - valid_options=[None, {}], - ) - if ( - self.adapter_config.alpha_pattern is not None - and len(self.adapter_config.alpha_pattern) > 0 - ): - raise NotSupportedCheckpointError( - invalid_option=f"alpha_pattern={self.adapter_config.alpha_pattern}", - valid_options=[None, {}], - ) - - if self.adapter_config.target_modules is not None: - for target_module in self.adapter_config.target_modules: - if ( - target_module - not in MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP[ - self.config.model_type - ] - ): - if ( - target_module - in MODEL_TYPE_TO_UNSUPPORTED_LORA_TARGET_MODULES_MAP[ - self.config.model_type - ] - ): - raise NotSupportedCheckpointError( - invalid_option=f"target_module={target_module}", - valid_options=list( - MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP[ - self.config.model_type - ] - ), - ) - - logger.warn( - "Target module %s does not exist in the base model (%s). Will be ignored.", - target_module, - self.adapter_config.base_model_name_or_path, - ) - - if (self.adapter_config.layers_to_transform is not None) and ( - self.adapter_config != list(range(self.converter.decoder_layer_num)) - ): - raise NotSupportedCheckpointError( - invalid_option=f"layers_to_transform={self.adapter_config.layers_to_transform}", - valid_options=[ - f"layers_to_transform=None" - f"layers_to_transform={list(range(self.converter.decoder_layer_num))}", - ], - ) - - def get_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Get convert dict for LoRA model.""" - return self.adapter_convert_info_list - - def _get_layers_to_transform(self) -> List[int]: - layers_to_transform = cast(LoraConfig, self.adapter_config).layers_to_transform - if layers_to_transform is None: - layers_to_transform = list(range(self.converter.decoder_layer_num)) - else: - if isinstance(layers_to_transform, int): - layers_to_transform = [layers_to_transform] - return layers_to_transform - - def lora_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """Reshape LoRA layer's weight to Friendli format.""" - assert len(params) == 1 - return params[0].transpose(0, 1) - - def pre_convert(self, model: torch.nn.Module) -> torch.nn.Module: - """Preprocess the adapter modules before converting. - - All the parameters of the LoRA low-rank matrixs are converted by `lora_weight_reshape`. - If the parameter can't be converted by `lora_weight_reshape`, - - """ - return model - - def convert( # pylint: disable=too-many-locals - self, - model: torch.nn.Module, - convert_info_list: List[ConvertInfo], - save_numpy_format: bool = True, - ) -> Generator[Tuple[str, Union[np.ndarray, torch.Tensor]], None, None]: - """Reshape Lora adapter model's all layer to Friendli format.""" - model = self.pre_convert(model) - yield from self.converter.convert(model, convert_info_list, save_numpy_format) - - def get_attributes(self) -> Dict[str, Any]: - """Get adapter checkpoint attributes.""" - return { - "name": "FILL ME", - "type": "lora", - "alpha": self.adapter_config.lora_alpha, - "rank": self.adapter_config.r, - "target-modules": list(self.adapter_target_modules), - "ckpt-path": "FILL ME", - } - - @property - def adapter_target_modules(self) -> Set[str]: - """Return the target modules that LoRA applies to.""" - if isinstance(self.adapter_config.target_modules, str): - hf_target_modules = {self.adapter_config.target_modules} - elif isinstance(self.adapter_config.target_modules, Iterable): - hf_target_modules = set(self.adapter_config.target_modules) - else: - raise CheckpointConversionError("`target_modules` should not be None") - - translated_target_modules = set() - for target in hf_target_modules: - if target in self.adapter_target_module_map: - translated_target_modules.add(self.adapter_target_module_map[target]) - - return translated_target_modules - - @property - @abstractmethod - def adapter_target_module_map(self) -> Dict[str, str]: - """Return the dictionary that maps Hugging Face's module name to Friendli's module name.""" - - @property - @abstractmethod - def adapter_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for LoRA modules of the model.""" - - -OneOfAdapterConverter = DecoderOnlyLoraConverter -OneOfConverter = Union[EncoderDecoderConverter, DecoderOnlyConverter] - - -class FP8OnlyConverter(DecoderOnlyConverter): - """FP8Only Architectures Converter Class.""" - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - raise NotImplementedError("Not supported in FP8 Conversion.") - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks.""" - raise NotImplementedError("Not supported in FP8 Conversion.") - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks.""" - raise NotImplementedError("Not supported in FP8 Conversion.") diff --git a/friendli/modules/converter/convert.py b/friendli/modules/converter/convert.py deleted file mode 100644 index 4e4338e0..00000000 --- a/friendli/modules/converter/convert.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Converter.""" - -from __future__ import annotations - -import os -from typing import Optional - -import yaml -from peft import PeftModel # type: ignore[import] # pylint: disable=import-error - -from friendli.enums import CheckpointFileType, ModelDataType, QuantMode -from friendli.errors import TokenizerNotFoundError -from friendli.logging import logger -from friendli.modules.converter.saver import get_saver -from friendli.utils.validate import validate_convert_imports - -validate_convert_imports() -# pylint: disable=import-outside-toplevel, wrong-import-position, wrong-import-order, ungrouped-imports -import torch # type: ignore[import] -from accelerate import init_empty_weights # type: ignore[import] - -from friendli.modules.converter.maps import ( - get_adapter_converter_factory, - get_hf_converter_factory, -) -from friendli.modules.converter.utils import ( - get_adapter_config, - get_model_arch, - get_model_generation_config, - get_model_pretrained_config, - get_torch_data_type, - save_tokenizer, -) -from friendli.modules.quantizer.maps import get_quantized_converter -from friendli.modules.quantizer.schema.config import OneOfQuantConfig - -# pylint: enable=import-outside-toplevel, wrong-import-position, wrong-import-order, ungrouped-imports - - -def convert_checkpoint( # pylint: disable=too-many-branches - model_name_or_path: str, - output_model_file_name: str, - output_attr_file_name: str, - output_dir: str, - output_ckpt_file_type: CheckpointFileType, - *, - data_type: Optional[ModelDataType] = None, - cache_dir: Optional[str] = None, - dry_run: bool = False, - quantize: bool = False, - quant_config: Optional[OneOfQuantConfig] = None, -) -> None: - """Convert HuggingFace model checkpoint to Friendli format. - - Args: - model_name_or_path (str): Hugging Face model name or local path to the checkpoint. - output_model_file_name (str): File name of converted checkpoint to save. - output_attr_file_name (str): File name of the attribute YAML file for - the converted checkpoint. - output_dir (str) : Directory path to save the converted checkpoint and the attribute YAML, - and tokenizer configuration file. - output_ckpt_file_type (CheckpointFileType): The file type of converted checkpoint. - data_type (Optional[ModelDataType]): Converted checkpoint data type. - Defaults to torch_dtype in 'config.json' - attr_output_path (Optional[str], optional): Path to create the attribute YAML file for - the converted checkpoint. Defaults to None. - cache_dir (Optional[str], optional): Path for downloading checkpoint. Defaults to None. - dry_run (bool, optional): Check only if checkpoint is convertable. Defaults to False. - quantize (bool, optional): Enable quantization. Defaults to False. - quant_config (Optional[OneOfQuantConfig], optional): Quantization configuration. - Defaults to None. - - Raises: - InValidconfigError: Raised when data_type is not supported. - NotFoundError: Raised when `model_name_or_path` or `tokenizer_output_dir` is not found. - NotSupportedCheckpointError: Raised when model architecture is not supported to convert. - - """ - # pylint: disable=too-many-locals - model_output_path = os.path.join(output_dir, output_model_file_name) - model_config = get_model_pretrained_config( - model_name_or_path, model_output_path, cache_dir - ) - generation_config = get_model_generation_config(model_name_or_path, cache_dir) - - model_arch = get_model_arch(model_config) - hf_factory, converter_factory = get_hf_converter_factory(model_arch) - converter = converter_factory( - config=model_config, - generation_config=generation_config, - data_type=data_type, - ) - - if quantize: - assert quant_config is not None - # common quantization only supports `.safetensors`` output format. - if quant_config.mode == QuantMode.FP8: - assert output_ckpt_file_type == CheckpointFileType.SAFETENSORS - converter = get_quantized_converter( # type: ignore[assignment] - quant_config, converter - ) - - converter.check_config() - - if not dry_run: - logger.info( - "Start loading Hugging Face checkpoint(%s) for conversion...", - model_name_or_path, - ) - model = hf_factory.from_pretrained( - model_name_or_path, - torch_dtype=model_config.torch_dtype, - cache_dir=cache_dir, - trust_remote_code=True, - low_cpu_mem_usage=True, - # `low_cpu_mem_usage` is for model loading faster and using ~1x model size CPU memory. - # https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel.from_pretrained.example - ) - - logger.info( - "Hugging Face checkpoint(%s) is successfully loaded!", - model_name_or_path, - ) - - convert_info_list = converter.get_convert_info_list() - with get_saver( - output_ckpt_file_type, output_dir, output_model_file_name - ) as saver: - for name, w in converter.convert( - model, - convert_info_list, - output_ckpt_file_type == CheckpointFileType.HDF5, - ): - saver.save_tensor(name, w) - - logger.info( - "Hugging Face checkpoint(%s) is successfully converted to Friendli format!", - model_name_or_path, - ) - - # Save attr.yaml - attr_output_path = os.path.join(output_dir, output_attr_file_name) - if quant_config and quant_config.mode == QuantMode.FP8 and ModelDataType.FP8_E4M3: - model_config.torch_dtype = ( - get_torch_data_type(data_type) if data_type else model_config.torch_dtype - ) - setattr(model_config, "use_fp8_e4m3", True) - model_config.to_json_file(os.path.join(output_dir, "config.json")) - else: - attr = converter.get_attributes() - with open(attr_output_path, "w", encoding="utf-8") as file: - yaml.dump(attr, file, sort_keys=False) - - # Save tokenizer files. - tokenizer_output_dir = output_dir - try: - saved_tokenizer_file_paths = save_tokenizer( - model_name_or_path=model_name_or_path, - cache_dir=cache_dir, - save_dir=tokenizer_output_dir, - ) - except TokenizerNotFoundError as exc: - logger.warn(str(exc)) - - if not ( - quant_config and quant_config.mode == QuantMode.FP8 and ModelDataType.FP8_E4M3 - ): - for path in saved_tokenizer_file_paths: - if "tokenizer.json" not in path: - try: - os.remove(path) - except FileNotFoundError: - logger.warn( - "Tried to delete unnecessary tokenizer file %s but the file " - "is not found.", - path, - ) - - -def convert_adapter_checkpoint( # pylint: disable=too-many-locals, too-many-arguments - adapter_name_or_path: str, - output_attr_filename: str, - output_dir: str, - output_adapter_filename: str, - base_model_name_or_path: Optional[str], - data_type: Optional[ModelDataType], - output_adapter_file_type: CheckpointFileType, - cache_dir: Optional[str], - dry_run: bool = False, -) -> None: - """Convert HuggingFace model checkpoint to Friendli format.""" - adapter_attr_output_path = os.path.join(output_dir, output_attr_filename) - adapter_config = get_adapter_config(adapter_name_or_path, cache_dir) - base_model_name_or_path = ( - base_model_name_or_path or adapter_config.base_model_name_or_path - ) - model_config = get_model_pretrained_config( - base_model_name_or_path, - adapter_attr_output_path, - cache_dir, - ) - model_arch = get_model_arch(model_config) - hf_factory, converter_factory = get_hf_converter_factory(model_arch) - converter = converter_factory( - config=model_config, - generation_config=None, - data_type=data_type, - ) - adapter_converter = get_adapter_converter_factory(model_arch)( - converter, adapter_config - ) - adapter_converter.check_config() - - if not dry_run: - logger.info( - "Start loading Hugging Face adapter checkpoint(%s's %s) for conversion...", - base_model_name_or_path, - adapter_name_or_path, - ) - with init_empty_weights(): - model = hf_factory.from_pretrained( - base_model_name_or_path, - torch_dtype=torch.float32, - cache_dir=cache_dir, - trust_remote_code=True, - low_cpu_mem_usage=True, - ) - # inplace model update - PeftModel.from_pretrained( - model, adapter_name_or_path, cache_dir=cache_dir, torch_dtype=torch.float32 - ) - logger.info( - "Hugging Face adapter checkpoint (%s) is successfully loaded!", - adapter_name_or_path, - ) - convert_dict = adapter_converter.get_convert_info_list() - with get_saver( - output_adapter_file_type, output_dir, output_adapter_filename - ) as saver: - for name, w in adapter_converter.convert( - model, convert_dict, output_adapter_file_type == CheckpointFileType.HDF5 - ): - saver.save_tensor(name, w) - - logger.info( - "Hugging Face checkpoint (%s) is successfully converted to Friendli format!", - adapter_name_or_path, - ) - - attr = adapter_converter.get_attributes() - with open(adapter_attr_output_path, "w", encoding="utf-8") as file: - yaml.dump([attr], file, sort_keys=False) diff --git a/friendli/modules/converter/interface.py b/friendli/modules/converter/interface.py deleted file mode 100644 index 6e7db352..00000000 --- a/friendli/modules/converter/interface.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Converter Interface.""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from collections.abc import Generator -from typing import Any, Dict, List, Tuple, Union - -import numpy as np -import torch -from tqdm import tqdm - -from friendli.enums import ModelDataType -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import ( - convert_tensor_dtype, - get_tensor_from_state_dict, -) - - -class ModelConversionInterface(ABC): - """Interface get information for converting models.""" - - @abstractmethod - def get_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Get list of conversion informations for the model.""" - - @abstractmethod - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - - @abstractmethod - def check_config(self) -> None: - """Check if the model is convertable.""" - - def convert( - self, - model: torch.nn.Module, - convert_info_list: List[ConvertInfo], - save_numpy_format: bool = True, - ) -> Generator[Tuple[str, Union[np.ndarray, torch.Tensor]], None, None]: - """Convert Huggingface Model to Friendli format(.h5). - - Args: - model (torch.nn.Module): Huggingface model. - output_path (str): Path to save the converted checkpoint. - convert_info_list (List[ConvertInfo]): - List of convert information of the parameter in huggingface checkpoint. - save_numpy_format (bool, optional): Save the converted tensor in numpy format. - Defaults to True. - """ - state_dict = model.state_dict() - total_layers = len(convert_info_list) - with tqdm(total=total_layers, desc="Converting", unit="tensor") as pbar: - for convert_info in convert_info_list: - converted_name, reshape_fn, param_names, data_type = ( - convert_info.converted_name, - convert_info.reshape_fn, - convert_info.param_names, - convert_info.data_type, - ) - params = [ - get_tensor_from_state_dict(state_dict, param_name) - for param_name in param_names - ] - reshaped_tensor = convert_tensor_dtype(reshape_fn(params), data_type) - if save_numpy_format: - yield ( - converted_name, - reshaped_tensor.view(torch.float16).numpy().view(np.uint16) - if data_type == ModelDataType.BF16 - else reshaped_tensor.numpy(), - ) - else: - yield ( - converted_name, - reshaped_tensor.contiguous(), - ) - - pbar.update() - - -class NonTFBlockConversionInterface(ABC): - """Interface get information for converting common layers.""" - - @property - @abstractmethod - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for the non-transformer blocks.""" - - -class DecoderTFBlockConversionInterface(ABC): - """Interface get information for converting decoder layers.""" - - @property - @abstractmethod - def decoder_layer_prefix(self) -> str: - """Return the layer name prefix used before the decoder's transformer block number.""" - - @property - @abstractmethod - def decoder_layer_num(self) -> int: - """Return the number of transformer blocks in the decoder.""" - - @property - @abstractmethod - def decoder_hidden_size(self) -> int: - """Return the hidden size of the decoder.""" - - @property - @abstractmethod - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads.""" - - @property - @abstractmethod - def decoder_num_attention_heads(self) -> int: - """Return the number of attention heads in the decoder.""" - - @property - @abstractmethod - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for transformer blocks in the decoder.""" - - @property - @abstractmethod - def decoder_head_size(self) -> int: - """Return the head size of the decoder.""" - - @property - @abstractmethod - def decoder_ff_intermediate_size(self) -> int: - """Return the intermediate size of the linear layer in decoder's MLP.""" - - -class EncoderTFBlockConversionInterface(ABC): - """Interface get information for converting encoder layers.""" - - @property - @abstractmethod - def encoder_layer_prefix(self) -> str: - """Return the layer name prefix used before the encoder's transformer block number.""" - - @property - @abstractmethod - def encoder_layer_num(self) -> int: - """Return the number of transformer blocks in the encoder.""" - - @property - @abstractmethod - def encoder_hidden_size(self) -> int: - """Return the hidden size of the encoder.""" - - @property - @abstractmethod - def encoder_num_attention_heads(self) -> int: - """Return the number of attention heads in the encoder.""" - - @property - @abstractmethod - def encoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for transformer blocks in the encoder.""" - - @property - @abstractmethod - def encoder_head_size(self) -> int: - """Return the head size of the encoder.""" - - @property - @abstractmethod - def encoder_ff_intermediate_size(self) -> int: - """Return the intermediate size of the linear layer in encoder's MLP.""" - - -class RotaryEmbeddingConversionInterface(ABC): - """Interface get information for converting rotary embeddings.""" - - @property - @abstractmethod - def rotary_dim(self) -> int: - """Return the dimension of rotary embeddings.""" - - @property - @abstractmethod - def rotary_emb_base(self) -> float: - """Return the base of rotary embeddings.""" diff --git a/friendli/modules/converter/maps.py b/friendli/modules/converter/maps.py deleted file mode 100644 index 7a8bcd37..00000000 --- a/friendli/modules/converter/maps.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Defining Friendli Model Converter maps.""" - -from __future__ import annotations - -from typing import Dict, Tuple, Type, Union - -from transformers import ( # type: ignore[import] - AutoModelForCausalLM, - BlenderbotForConditionalGeneration, - BloomForCausalLM, - CodeGenForCausalLM, - CohereForCausalLM, - DbrxForCausalLM, - FalconForCausalLM, - GPT2LMHeadModel, - GPTJForCausalLM, - GPTNeoXForCausalLM, - LlamaForCausalLM, - MistralForCausalLM, - MixtralForCausalLM, - MptForCausalLM, - OPTForCausalLM, - Phi3ForCausalLM, - PreTrainedModel, - T5ForConditionalGeneration, -) - -from friendli.errors import NotSupportedCheckpointError -from friendli.modules.converter.base import OneOfAdapterConverter, OneOfConverter -from friendli.modules.converter.models.arctic import ArcticForCausalLMConverter -from friendli.modules.converter.models.blenderbot import BlenderbotConverter -from friendli.modules.converter.models.bloom import BloomForCausalLMConverter -from friendli.modules.converter.models.codegen import CodegenForCausalLMConverter -from friendli.modules.converter.models.cohere import CohereForCausalLMConverter -from friendli.modules.converter.models.dbrx import DbrxForCausalLMConverter -from friendli.modules.converter.models.falcon import FalconForCausalLMConverter -from friendli.modules.converter.models.gpt2 import GPT2LMHeadModelConverter -from friendli.modules.converter.models.gpt_neox import GPTNeoXForCausalLMConverter -from friendli.modules.converter.models.gptj import ( - GPTJForCausalLMConverter, - GPTJForCausalLMLoraConverter, -) -from friendli.modules.converter.models.llama import ( - LlamaForCausalLMConverter, - LlamaForCausalLMLoraConverter, -) -from friendli.modules.converter.models.mistral import ( - MistralForCausalLMConverter, - MistralForCausalLMLoraConverter, -) -from friendli.modules.converter.models.mixtral import MixtralForCausalLMConverter -from friendli.modules.converter.models.mpt import ( - MPTForCausalLMConverter, - MptForCausalLMLoraConverter, -) -from friendli.modules.converter.models.opt import OPTForCausalLMConverter -from friendli.modules.converter.models.phi3 import Phi3ForCausalLMConverter -from friendli.modules.converter.models.phi_msft import PhiForCausalLMConverter -from friendli.modules.converter.models.t5 import T5Converter - -MODEL_ARCH_CONVERTER_MAP: Dict[ - str, Tuple[Union[PreTrainedModel, PreTrainedModel], Type[OneOfConverter]] -] = { - "BlenderbotForConditionalGeneration": ( - BlenderbotForConditionalGeneration, - BlenderbotConverter, - ), - "BloomForCausalLM": (BloomForCausalLM, BloomForCausalLMConverter), - "CodeGenForCausalLM": (CodeGenForCausalLM, CodegenForCausalLMConverter), - "FalconForCausalLM": (FalconForCausalLM, FalconForCausalLMConverter), - "GPTNeoXForCausalLM": (GPTNeoXForCausalLM, GPTNeoXForCausalLMConverter), - "GPT2LMHeadModel": (GPT2LMHeadModel, GPT2LMHeadModelConverter), - "GPTJForCausalLM": (GPTJForCausalLM, GPTJForCausalLMConverter), - "LlamaForCausalLM": (LlamaForCausalLM, LlamaForCausalLMConverter), - "LLaMAForCausalLM": (LlamaForCausalLM, LlamaForCausalLMConverter), - "MistralForCausalLM": (MistralForCausalLM, MistralForCausalLMConverter), - "MixtralForCausalLM": (MixtralForCausalLM, MixtralForCausalLMConverter), - "MPTForCausalLM": (MptForCausalLM, MPTForCausalLMConverter), - "OPTForCausalLM": (OPTForCausalLM, OPTForCausalLMConverter), - "T5ForConditionalGeneration": (T5ForConditionalGeneration, T5Converter), - "PhiForCausalLM": (AutoModelForCausalLM, PhiForCausalLMConverter), - "CohereForCausalLM": (CohereForCausalLM, CohereForCausalLMConverter), - "DbrxForCausalLM": (DbrxForCausalLM, DbrxForCausalLMConverter), - "Phi3ForCausalLM": (Phi3ForCausalLM, Phi3ForCausalLMConverter), - "ArcticForCausalLM": (AutoModelForCausalLM, ArcticForCausalLMConverter), -} - -MODEL_ARCH_ADAPTER_CONVERTER_MAP: Dict[ - str, - Type[OneOfAdapterConverter], -] = { - "GPTJForCausalLM": GPTJForCausalLMLoraConverter, - "LlamaForCausalLM": LlamaForCausalLMLoraConverter, - "LLaMAForCausalLM": LlamaForCausalLMLoraConverter, - "MPTForCausalLM": MptForCausalLMLoraConverter, - "MistralForCausalLM": MistralForCausalLMLoraConverter, -} - - -def get_hf_converter_factory( - model_arch: str, -) -> Tuple[PreTrainedModel, Type[OneOfConverter]]: - """Return the converter factory for the given model architecture. - - Args: - model_arch (str): Model architecture name. - - Returns: - Tuple[PretrainedModel, Type[OneOfConverter]]: Tuple of - model class and converter class. - - Raises: - NotSupportedCheckpointError: Raised when the given model architecture is not supported. - - """ - if model_arch not in MODEL_ARCH_CONVERTER_MAP: - raise NotSupportedCheckpointError( - invalid_option=f"Model architecture='{model_arch}'", - valid_options=list(MODEL_ARCH_CONVERTER_MAP.keys()), - ) - - return MODEL_ARCH_CONVERTER_MAP[model_arch] - - -def get_adapter_converter_factory( - model_arch: str, -) -> Type[OneOfAdapterConverter]: - """Return the converter factory for the given model architecture. - - Args: - model_arch (str): Model architecture name. - - Returns: - Type[LoraConverter]: Adapter Converter class. - - Raises: - NotSupportedCheckpointError: Raised when the given model architecture is not supported. - """ - try: - adapter_converter_type = MODEL_ARCH_ADAPTER_CONVERTER_MAP[model_arch] - except KeyError as exc: - raise NotSupportedCheckpointError( - invalid_option=f"adapter for model architecture='{model_arch}'", - valid_options=list(MODEL_ARCH_ADAPTER_CONVERTER_MAP.keys()), - ) from exc - return adapter_converter_type diff --git a/friendli/modules/converter/models/arctic.py b/friendli/modules/converter/models/arctic.py deleted file mode 100644 index 293d21d9..00000000 --- a/friendli/modules/converter/models/arctic.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Arctic Checkpoint Converter.""" - - -from __future__ import annotations - -from typing import cast - -from transformers import PretrainedConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import FP8OnlyConverter -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface - - -class ArcticConfig(PretrainedConfig): - r""" - This is the configuration class to store the configuration of a [`ArcticModel`]. It is used to instantiate an - Arctic model according to the specified arguments, defining the model architecture. Instantiating a configuration - with the defaults will yield a similar configuration to that of the #TODO(rsamdani): add what model has the default config.. - Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the - documentation from [`PretrainedConfig`] for more information. - Args: - vocab_size (`int`, *optional*, defaults to 32000): - Vocabulary size of the Arctic model. Defines the number of different tokens that can be represented by the - `inputs_ids` passed when calling [`ArcticModel`] - hidden_size (`int`, *optional*, defaults to 4096): - Dimension of the hidden representations. - intermediate_size (`int`, *optional*, defaults to 14336): - Dimension of the MLP representations. - num_hidden_layers (`int`, *optional*, defaults to 32): - Number of hidden layers in the Transformer encoder. - num_attention_heads (`int`, *optional*, defaults to 32): - Number of attention heads for each attention layer in the Transformer encoder. - num_key_value_heads (`int`, *optional*, defaults to 8): - This is the number of key_value heads that should be used to implement Grouped Query Attention. If - `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if - `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When - converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed - by meanpooling all the original heads within that group. For more details checkout [this - paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`. - hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): - The non-linear activation function (function or string) in the decoder. - max_position_embeddings (`int`, *optional*, defaults to `4096*32`): - The maximum sequence length that this model might ever be used with. Arctic's sliding window attention - allows sequence of up to 4096*32 tokens. - initializer_range (`float`, *optional*, defaults to 0.02): - The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - rms_norm_eps (`float`, *optional*, defaults to 1e-05): - The epsilon used by the rms normalization layers. - use_cache (`bool`, *optional*, defaults to `True`): - Whether or not the model should return the last key/values attentions (not used by all models). Only - relevant if `config.is_decoder=True`. - pad_token_id (`int`, *optional*): - The id of the padding token. - bos_token_id (`int`, *optional*, defaults to 1): - The id of the "beginning-of-sequence" token. - eos_token_id (`int`, *optional*, defaults to 2): - The id of the "end-of-sequence" token. - tie_word_embeddings (`bool`, *optional*, defaults to `False`): - Whether the model's input and output word embeddings should be tied. - rope_theta (`float`, *optional*, defaults to 1000000.0): - The base period of the RoPE embeddings. - sliding_window (`int`, *optional*): - Sliding window attention window size. If not specified, will default to `4096`. - attention_dropout (`float`, *optional*, defaults to 0.0): - The dropout ratio for the attention probabilities. - num_experts_per_tok (`int`, *optional*, defaults to 2): - The number of experts to root per-token, can be also interpreted as the `top-p` routing - parameter - num_local_experts (`int`, *optional*, defaults to 8): - Number of experts per Sparse MLP layer. - router_aux_loss_coef (`float`, *optional*, defaults to 0.001): - The aux loss factor for the total loss. - ```python - >>> from transformers import ArcticModel, ArcticConfig - >>> # Initializing a Arctic 7B style configuration TODO(rsamdani): verify which model does the default configuration correspond to. - >>> configuration = ArcticConfig() - >>> # Initializing a model from the Arctic 7B style configuration - >>> model = ArcticModel(configuration) - >>> # Accessing the model configuration - >>> configuration = model.config - ```""" - - model_type = "arctic" - keys_to_ignore_at_inference = ["past_key_values"] - - def __init__( - self, - vocab_size=32000, - hidden_size=4096, - intermediate_size=14336, - num_hidden_layers=32, - num_attention_heads=32, - num_key_value_heads=None, - hidden_act="silu", - max_position_embeddings=4096, - initializer_range=0.02, - rms_norm_eps=1e-5, - use_cache=True, - pad_token_id=None, - bos_token_id=1, - eos_token_id=2, - tie_word_embeddings=False, - rope_theta=1e6, - sliding_window=None, - attention_dropout=0.0, - num_experts_per_tok=1, - num_local_experts=8, - router_aux_loss_coef=0.001, - moe_layer_frequency=2, - parallel_attn_mlp_res=False, - moe_train_capacity_factor=1, - moe_eval_capacity_factor=1, - enable_expert_tensor_parallelism=False, - moe_min_capacity=0, - moe_token_dropping=True, - **kwargs, - ): - self.vocab_size = vocab_size - self.max_position_embeddings = max_position_embeddings - self.hidden_size = hidden_size - self.intermediate_size = intermediate_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.sliding_window = sliding_window - - # for backward compatibility - if num_key_value_heads is None: - num_key_value_heads = num_attention_heads - - self.num_key_value_heads = num_key_value_heads - self.hidden_act = hidden_act - self.initializer_range = initializer_range - self.rms_norm_eps = rms_norm_eps - self.use_cache = use_cache - self.rope_theta = rope_theta - self.attention_dropout = attention_dropout - - self.num_experts_per_tok = num_experts_per_tok - self.num_local_experts = num_local_experts - self.router_aux_loss_coef = router_aux_loss_coef - self.moe_layer_frequency = moe_layer_frequency - self.moe_train_capacity_factor = moe_train_capacity_factor - self.moe_eval_capacity_factor = moe_eval_capacity_factor - self.enable_expert_tensor_parallelism = enable_expert_tensor_parallelism - self.moe_min_capacity = moe_min_capacity - self.moe_token_dropping = moe_token_dropping - self.parallel_attn_mlp_res = parallel_attn_mlp_res - super().__init__( - pad_token_id=pad_token_id, - bos_token_id=bos_token_id, - eos_token_id=eos_token_id, - tie_word_embeddings=tie_word_embeddings, - **kwargs, - ) - - -class ArcticForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface): - """ArcticForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Arctic architectures' config can be converted to Friendli format.""" - super().check_config() - config = cast(ArcticConfig, self.config) - try: - if config.tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if config.hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={config.hidden_act}'", - valid_options=["silu"], - ) - if config.moe_layer_frequency != 1: - raise NotSupportedCheckpointError( - invalid_option=f"'moe_layer_frequency={config.moe_layer_frequency}'", - valid_options=[1], - ) - if not config.parallel_attn_mlp_res: - raise NotSupportedCheckpointError( - invalid_option=f"'parallel_attn_mlp_res={config.parallel_attn_mlp_res}'", - valid_options=[True], - ) - - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - @property - def model_type(self) -> str: - """Model type.""" - return "arctic" - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before Arctic's transformer block number.""" - return "model.layers." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in Arctic.""" - return cast(ArcticConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in Arctic.""" - return cast(ArcticConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in Arctic.""" - return cast(ArcticConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in Arctic.""" - config = cast(ArcticConfig, self.config) - if config.num_key_value_heads is None: - return self.decoder_num_attention_heads - return config.num_key_value_heads - - @property - def decoder_head_size(self) -> int: - """The head size of Arctic.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in Arctic MLP.""" - return cast(ArcticConfig, self.config).intermediate_size - - @property - def rotary_dim(self) -> int: - """The rotary embedding dimension of Arctic.""" - return self.decoder_head_size - - @property - def rotary_emb_base(self) -> float: - """The rotary embedding base of Arctic.""" - return cast(ArcticConfig, self.config).rope_theta - - @property - def num_experts(self) -> int: - """The number of moe experts per transformer block in Arctic.""" - return cast(ArcticConfig, self.config).num_local_experts - - @property - def num_selected_moe_experts(self) -> int: - """The number of selected moe experts per transformer block in Arctic.""" - return cast(ArcticConfig, self.config).num_experts_per_tok diff --git a/friendli/modules/converter/models/blenderbot.py b/friendli/modules/converter/models/blenderbot.py deleted file mode 100644 index 224ded48..00000000 --- a/friendli/modules/converter/models/blenderbot.py +++ /dev/null @@ -1,472 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Blenderbot Checkpoint Converter.""" - -from __future__ import annotations - -import math -from typing import Any, Dict, List, cast - -import numpy as np -import torch -from transformers import BlenderbotConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - ENCODER_PREFIX, - SUPPORTED_GELU_FAMILY, - EncoderDecoderConverter, -) -from friendli.modules.converter.schema import ConvertInfo - - -class BlenderbotConverter(EncoderDecoderConverter): - """BlenderbotForConditionalGeneration Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Blenderbot architectures's config can be converted to Friendli format.""" - super().check_config() - config = cast(BlenderbotConfig, self.config) - try: - if config.activation_function not in SUPPORTED_GELU_FAMILY: - raise NotSupportedCheckpointError( - invalid_option="'activation_function=" - f"{cast(BlenderbotConfig, self.config).activation_function}'", - valid_options=SUPPORTED_GELU_FAMILY, - ) - if not config.tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=False'", - valid_options=[True], - ) - if self.encoder_num_attention_heads != self.decoder_num_attention_heads: - raise NotSupportedCheckpointError( - invalid_option=( - f"encoder_num_attention_heads={self.encoder_num_attention_heads} " - f"decoder_num_attention_heads={self.decoder_num_attention_heads}" - ), - valid_options=[ - "encoder_num_attention_heads == decoder_num_attention_heads" - ], - ) - if config.decoder_ffn_dim != config.encoder_ffn_dim: - raise NotSupportedCheckpointError( - invalid_option=( - f"encoder_ffn_dim={config.encoder_ffn_dim} " - f"decoder_ffn_dim={config.decoder_ffn_dim}" - ), - valid_options=["encoder_ffn_dim == decoder_ffn_dim"], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def token_embed_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """Reshape token embedding weight for Blenderbot's embedding layer.""" - assert len(params) == 1 - embed_dim = cast(BlenderbotConfig, self.config).d_model - embed_scale = ( - math.sqrt(embed_dim) - if cast(BlenderbotConfig, self.config).scale_embedding - else 1.0 - ) - embed_weight = params[0] - embed_weight = embed_weight * embed_scale - return embed_weight - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(BlenderbotConfig, self.config) - - logger.warn( - "Since Blenderbot uses absolute position embedding, 'max_input_length' and " - "'max_output_length' cannot be larger than %d.", - config.max_position_embeddings, - ) - - eos_token_id = self.get_eos_token_id() - decoder_start_token_id = self.get_decoder_start_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.encoder_head_size, - "num_heads": self.encoder_num_attention_heads, - "hidden_size": self.encoder_hidden_size, - "ff_intermediate_size": self.decoder_ff_intermediate_size, - "num_encoder_layers": self.encoder_layer_num, - "num_decoder_layers": self.decoder_layer_num, - "max_input_length": config.max_position_embeddings, - "max_output_length": config.max_position_embeddings, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "decoder_start_token": ( - decoder_start_token_id - if decoder_start_token_id is not None - else "FILL ME" - ), - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "blenderbot" - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in Blenderbot.""" - return [ - ConvertInfo( - param_names=["model.shared.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["model.shared.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ConvertInfo( - param_names=["model.encoder.embed_positions.weight"], - data_type=self.data_type, - converted_name=f"{ENCODER_PREFIX}/wpe/weight:0", - reshape_fn=self.pos_embed_weight_reshape, - ), - ConvertInfo( - param_names=["model.decoder.embed_positions.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/wpe/weight:0", - reshape_fn=self.pos_embed_weight_reshape, - ), - ConvertInfo( - param_names=["model.encoder.layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{ENCODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["model.encoder.layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{ENCODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["model.decoder.layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["model.decoder.layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ] - - @property - def encoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in Blenderbot's encoder.""" - convert_info_list = [] - for i in range(self.encoder_layer_num): - layer_prefix = f"{self.encoder_layer_prefix}{i}." - converted_prefix = f"{ENCODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}self_attn_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.weight", - f"{layer_prefix}self_attn.k_proj.weight", - f"{layer_prefix}self_attn.v_proj.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.bias", - f"{layer_prefix}self_attn.k_proj.bias", - f"{layer_prefix}self_attn.v_proj.bias", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/bias:0", - reshape_fn=self.qkv_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.out_proj.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}final_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}final_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc1.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc2.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc2.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ] - ) - return convert_info_list - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in Blenderbot's decoder.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}self_attn_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.weight", - f"{layer_prefix}self_attn.k_proj.weight", - f"{layer_prefix}self_attn.v_proj.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.bias", - f"{layer_prefix}self_attn.k_proj.bias", - f"{layer_prefix}self_attn.v_proj.bias", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/bias:0", - reshape_fn=self.qkv_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.out_proj.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}encoder_attn_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}encoder_attn_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}final_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_3/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}final_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_3/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}encoder_attn.q_proj.weight", - f"{layer_prefix}encoder_attn.k_proj.weight", - f"{layer_prefix}encoder_attn.v_proj.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}cross_attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}encoder_attn.q_proj.bias", - f"{layer_prefix}encoder_attn.k_proj.bias", - f"{layer_prefix}encoder_attn.v_proj.bias", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}cross_attn/c_attn/bias:0", - reshape_fn=self.qkv_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}encoder_attn.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}cross_attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}encoder_attn.out_proj.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}cross_attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc1.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc2.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc2.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ] - ) - return convert_info_list - - @property - def encoder_layer_prefix(self) -> str: - """The layer name prefix used before Blenderbot encoder's transformer block number.""" - return "model.encoder.layers." - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before Blenderbot decoder's transformer block number.""" - return "model.decoder.layers." - - @property - def encoder_layer_num(self) -> int: - """The number of transformer blocks in Blenderbot encoder.""" - return cast(BlenderbotConfig, self.config).encoder_layers - - @property - def encoder_hidden_size(self) -> int: - """The hidden size of Blenderbot encoder.""" - return cast(BlenderbotConfig, self.config).d_model - - @property - def encoder_num_attention_heads(self) -> int: - """The number of attention heads of Blenderbot encoder.""" - return cast(BlenderbotConfig, self.config).encoder_attention_heads - - @property - def encoder_head_size(self) -> int: - """The size of each attention head of Blenderbot encoder.""" - return self.encoder_hidden_size // self.encoder_num_attention_heads - - @property - def encoder_ff_intermediate_size(self) -> int: - """The intermediate of the linear layer in Blenderbot encoder's MLP.""" - return cast(BlenderbotConfig, self.config).encoder_ffn_dim - - @property - def decoder_layer_num(self) -> int: - """The number of transformer blocks in Blenderbot decoder.""" - return cast(BlenderbotConfig, self.config).decoder_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size of Blenderbot decoder.""" - return cast(BlenderbotConfig, self.config).d_model - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads of Blenderbot decoder.""" - return cast(BlenderbotConfig, self.config).decoder_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads of blenderbot decoder.""" - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The size of each attention head of Blenderbot decoder.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate of the linear layer in Blenderbot decoder's MLP.""" - return cast(BlenderbotConfig, self.config).decoder_ffn_dim diff --git a/friendli/modules/converter/models/bloom.py b/friendli/modules/converter/models/bloom.py deleted file mode 100644 index 7ce615ad..00000000 --- a/friendli/modules/converter/models/bloom.py +++ /dev/null @@ -1,277 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Bloom Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import numpy as np -import torch -from transformers import BloomConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import DECODER_PREFIX, DecoderOnlyConverter -from friendli.modules.converter.schema import ConvertInfo - - -class BloomForCausalLMConverter(DecoderOnlyConverter): - """BloomForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Bloom architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(BloomConfig, self.config).apply_residual_connection_post_layernorm: - raise NotSupportedCheckpointError( - invalid_option="apply_residual_connection_post_layernorm=True", - valid_options=[False], - ) - if cast(BloomConfig, self.config).slow_but_exact: - raise NotSupportedCheckpointError( - invalid_option="slow_but_exact=True", valid_options=[False] - ) - if not cast(BloomConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="tie_word_embeddings=False", valid_options=[True] - ) - if cast(BloomConfig, self.config).layer_norm_epsilon != 1e-5: - raise NotSupportedCheckpointError( - invalid_option="layer_norm_epsilon=" - f"{cast(BloomConfig, self.config).layer_norm_epsilon}", - valid_options=[1e-5], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def qkv_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """qkv_weight_reshape for Bloom's attention layer.""" - assert len(params) == 1 - qkv_weight = params[0] - split_qkv_weight_list = torch.split(qkv_weight, self.decoder_head_size, dim=0) - qkv_weight_list = [ - torch.cat( - [ - split_qkv_weight_list[j * 3 + i] - for j in range(self.decoder_num_attention_heads) - ], - dim=0, - ).reshape(-1, self.decoder_hidden_size) - for i in range(3) - ] - - qkv_weight = torch.cat(qkv_weight_list, dim=0).transpose(0, 1) - return qkv_weight - - def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """qkv_bias_reshape for Bloom's attention layer.""" - assert len(params) == 1 - qkv_bias = params[0] - split_qkv_bias_list = torch.split(qkv_bias, self.decoder_head_size, dim=0) - qkv_bias_list = [ - torch.cat( - [ - split_qkv_bias_list[j * 3 + i] - for j in range(self.decoder_num_attention_heads) - ], - dim=0, - ) - for i in range(3) - ] - - qkv_bias = torch.cat(qkv_bias_list, dim=0) - return qkv_bias - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(BloomConfig, self.config) - - logger.warn( - "The 'max_length' field is left blank as it cannot be automatically configured. " - "You must determine the 'max_length' according to your needs. The Bloom model does " - "not rely on absolute position embeddings, allowing you to choose any " - "suitable value." - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "num_heads": self.decoder_num_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": "FILL ME", - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "bloom" - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in Bloom.""" - return [ - ConvertInfo( - param_names=["transformer.word_embeddings.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.word_embeddings_layernorm.weight"], - data_type=self.data_type, - converted_name="wte/ln/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.word_embeddings_layernorm.bias"], - data_type=self.data_type, - converted_name="wte/ln/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ] - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in Bloom.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attention.query_key_value.bias" - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/bias:0", - reshape_fn=self.qkv_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attention.dense.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}post_attention_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}post_attention_layernorm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_h_to_4h.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_4h_to_h.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attention.query_key_value.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attention.dense.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_h_to_4h.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_4h_to_h.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - return convert_info_list - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before Bloom's transformer block number.""" - return "transformer.h." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in Bloom.""" - return cast(BloomConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """Return the hidden size in Bloom.""" - return cast(BloomConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in Bloom.""" - return cast(BloomConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in bloom.""" - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The size of each attention head in Bloom.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in Bloom MLP.""" - return self.decoder_hidden_size * 4 diff --git a/friendli/modules/converter/models/codegen.py b/friendli/modules/converter/models/codegen.py deleted file mode 100644 index a6f1ef03..00000000 --- a/friendli/modules/converter/models/codegen.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli CodeGen Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import torch -from transformers import CodeGenConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - SUPPORTED_GELU_FAMILY, - DecoderOnlyConverter, -) -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface -from friendli.modules.converter.schema import ConvertInfo - - -class CodegenForCausalLMConverter( - DecoderOnlyConverter, RotaryEmbeddingConversionInterface -): - """CodegenForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if CodeGen architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if ( - cast(CodeGenConfig, self.config).activation_function - not in SUPPORTED_GELU_FAMILY - ): - raise NotSupportedCheckpointError( - invalid_option="'activation_function=" - f"{cast(CodeGenConfig, self.config).activation_function}'", - valid_options=SUPPORTED_GELU_FAMILY, - ) - if cast(CodeGenConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if cast(CodeGenConfig, self.config).layer_norm_epsilon != 1e-5: - raise NotSupportedCheckpointError( - invalid_option="'layer_norm_epsilon=" - f"{cast(CodeGenConfig, self.config).layer_norm_epsilon}'", - valid_options=[1e-5], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def qkv_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """qkv_weight_reshape for CodeGen's attention layer.""" - assert len(params) == 1 - original_qkv_weight = params[0] - reshaped_qkv_weight = original_qkv_weight.reshape( - (4, original_qkv_weight.size(0) // 4, original_qkv_weight.size(1)) - ) - q_weight, v_weight, k_weight = torch.split( - reshaped_qkv_weight, reshaped_qkv_weight.size(1) // 3, dim=1 - ) - q_weight = q_weight.reshape((-1, q_weight.size(2))) - k_weight = k_weight.reshape((-1, k_weight.size(2))) - v_weight = v_weight.reshape((-1, v_weight.size(2))) - - qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0) - qkv_weight = qkv_weight.transpose(0, 1) - - return qkv_weight - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(CodeGenConfig, self.config) - - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The CodeGen model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - config.n_positions, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": config.n_positions, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "rope_theta": self.rotary_emb_base, - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "gpt-j" - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in CodeGen.""" - return [ - ConvertInfo( - param_names=["transformer.wte.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["lm_head.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ConvertInfo( - param_names=["lm_head.bias"], - data_type=self.data_type, - converted_name="head_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ] - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in CodeGen.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ln_1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_1.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_in.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_out.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.qkv_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_in.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_out.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - return convert_info_list - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before CodeGen's transformer block number.""" - return "transformer.h." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in CodeGen.""" - return cast(CodeGenConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in CodeGen.""" - return cast(CodeGenConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in CodeGen.""" - return cast(CodeGenConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in the codegen.""" - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head siez of CodeGen.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in codegen MLP.""" - return self.decoder_hidden_size * 4 - - @property - def rotary_dim(self) -> int: - """The rotary dim in CodeGen.""" - return cast(CodeGenConfig, self.config).rotary_dim - - @property - def rotary_emb_base(self) -> float: - """The rotary emb base in CodeGen.""" - return 10000.0 diff --git a/friendli/modules/converter/models/cohere.py b/friendli/modules/converter/models/cohere.py deleted file mode 100644 index 47217e23..00000000 --- a/friendli/modules/converter/models/cohere.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Cohere Checkpoint Converter.""" - - -from __future__ import annotations - -from typing import cast - -from transformers import CohereConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.modules.converter.base import FP8OnlyConverter -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface -from friendli.modules.converter.schema import ConvertInfo - - -class CohereForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface): - """CohereForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if LLaMA architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(CohereConfig, self.config).hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(CohereConfig, self.config).hidden_act}'", - valid_options=["silu"], - ) - if not cast(CohereConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=False'", - valid_options=[True], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - @property - def model_type(self) -> str: - """Model type.""" - return "cohere" - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before LLaMA's transformer block number.""" - return "model.layers." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in LLaMA.""" - return cast(CohereConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in LLaMA.""" - return cast(CohereConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in LLaMA.""" - return cast(CohereConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in LLaMA.""" - config = cast(CohereConfig, self.config) - if config.num_key_value_heads is None: - return self.decoder_num_attention_heads - return config.num_key_value_heads - - @property - def decoder_head_size(self) -> int: - """The head size of LLaMA.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in LLaMA MLP.""" - return self.config.intermediate_size - - @property - def rotary_dim(self) -> int: - """The rotary embedding dimension of LLaMA.""" - return self.decoder_head_size - - @property - def rotary_emb_base(self) -> float: - """The rotary embedding base of LLaMA.""" - return cast(CohereConfig, self.config).rope_theta diff --git a/friendli/modules/converter/models/dbrx.py b/friendli/modules/converter/models/dbrx.py deleted file mode 100644 index 88c9094f..00000000 --- a/friendli/modules/converter/models/dbrx.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Dbrx Checkpoint Converter.""" - - -from __future__ import annotations - -from typing import cast - -from transformers.models.dbrx.configuration_dbrx import ( # type: ignore[import] - DbrxConfig, - DbrxFFNConfig, -) - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.modules.converter.base import FP8OnlyConverter -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface - - -class DbrxForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface): - """DbrxForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Dbrx architectures' config can be converted to Friendli format.""" - super().check_config() - config = cast(DbrxConfig, self.config) - try: - if config.tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if config.ffn_config.moe_top_k not in [1, 2, 4]: - raise NotSupportedCheckpointError( - invalid_option=f"'moe_top_k={config.ffn_config.moe_top_k}'", - valid_options=[1, 2, 4], - ) - if config.ffn_config.moe_num_experts not in [1, 2, 4, 8, 16]: - raise NotSupportedCheckpointError( - invalid_option=f"'moe_num_experts={config.ffn_config.moe_num_experts}'", - valid_options=[1, 2, 4, 8, 16], - ) - - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - @property - def model_type(self) -> str: - """Model type.""" - return "dbrx" - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before LLaMA's transformer block number.""" - return "transformer.blocks." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in LLaMA.""" - return cast(DbrxConfig, self.config).n_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in LLaMA.""" - return cast(DbrxConfig, self.config).d_model - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in LLaMA.""" - return cast(DbrxConfig, self.config).n_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in LLaMA.""" - config = cast(DbrxConfig, self.config) - if config.attn_config.kv_n_heads is None: - return self.decoder_num_attention_heads - return config.attn_config.kv_n_heads - - @property - def decoder_head_size(self) -> int: - """The head size of LLaMA.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in LLaMA MLP.""" - return cast(DbrxConfig, self.config).ffn_config.ffn_hidden_size - - @property - def rotary_dim(self) -> int: - """The rotary embedding dimension of LLaMA.""" - return self.decoder_head_size - - @property - def rotary_emb_base(self) -> float: - """The rotary embedding base of LLaMA.""" - return cast(DbrxConfig, self.config).attn_config.rope_theta diff --git a/friendli/modules/converter/models/falcon.py b/friendli/modules/converter/models/falcon.py deleted file mode 100644 index 3dfdede2..00000000 --- a/friendli/modules/converter/models/falcon.py +++ /dev/null @@ -1,329 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Falcon Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import torch -from transformers import FalconConfig # type: ignore[import] - -from friendli.errors import NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import DECODER_PREFIX, DecoderOnlyConverter -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import convert_to_gpt_j_params - - -class FalconForCausalLMConverter( - DecoderOnlyConverter, RotaryEmbeddingConversionInterface -): - """FalconForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Falcon architectures' config can be converted to Friendli format.""" - super().check_config() - config = cast(FalconConfig, self.config) - - if config.layer_norm_epsilon != 1e-5: - raise NotSupportedCheckpointError( - invalid_option=f"'layer_norm_epsilon={config.layer_norm_epsilon}'", - valid_options=[1e-5], - ) - - if config.alibi: - raise NotSupportedCheckpointError( - invalid_option=f"'alibi'={config.alibi}'", - valid_options=[False], - ) - - if not config.rotary: - raise NotSupportedCheckpointError( - invalid_option=f"'rotary'={config.rotary}'", - valid_options=[True], - ) - - if config.bias: - raise NotSupportedCheckpointError( - invalid_option=f"'bias'={config.bias}'", - valid_options=[False], - ) - - if not config.new_decoder_architecture and not config.parallel_attn: - raise NotSupportedCheckpointError( - invalid_option=( - f"'new_decoder_architecture'={config.new_decoder_architecture}" - f"'parallel_attn'={config.parallel_attn}" - ), - valid_options=[ - "'new_decoder_architecture'=True", - "'new_decoder_architecture'=False, 'parallel_attn'=True", - ], - ) - - def qkv_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """qkv_weight_reshape for Falcon's attention layer.""" - assert len(params) == 1 - qkv_weight = params[0] - - num_queries_per_kv = ( - self.decoder_num_attention_heads // self.decoder_num_kv_attention_heads - ) - - qkv_weight = qkv_weight.reshape( - self.decoder_num_kv_attention_heads, - num_queries_per_kv + 2, - self.decoder_head_size, - self.decoder_hidden_size, - ) - - q_weight = qkv_weight[:, :num_queries_per_kv].reshape( - self.decoder_num_kv_attention_heads * num_queries_per_kv, - self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = qkv_weight[:, [-2]].reshape( - self.decoder_num_kv_attention_heads, - self.decoder_head_size, - self.decoder_hidden_size, - ) - v_weight = qkv_weight[:, [-1]].reshape( - self.decoder_num_kv_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - - q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim) - k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim) - - q_weight = q_weight.reshape( - self.decoder_num_kv_attention_heads - * num_queries_per_kv - * self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = k_weight.reshape( - self.decoder_num_kv_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - - qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0) - qkv_weight = qkv_weight.transpose(0, 1) - - return qkv_weight - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(FalconConfig, self.config) - - logger.warn( - "The 'max_length' field is left blank as it cannot be automatically configured. " - "You must determine the 'max_length' according to your needs. The Falcon model does " - "not rely on absolute position embeddings, allowing you to choose any " - "suitable value." - ) - - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_kv_heads": self.decoder_num_kv_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": "FILL ME", - "vocab_size": config.vocab_size, - "eos_token": self.get_eos_token_id() or "FILL ME", - "rope_theta": self.rotary_emb_base, - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - if cast(FalconConfig, self.config).new_decoder_architecture: - return "falcon" - return "falcon-7b" - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in Falcon.""" - return [ - ConvertInfo( - param_names=["transformer.word_embeddings.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["lm_head.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ] - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in Falcon.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attention.query_key_value.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attention.dense.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_h_to_4h.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_4h_to_h.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - - if cast(FalconConfig, self.config).new_decoder_architecture: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ln_attn.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_attn.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_mlp.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_mlp.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ] - ) - else: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ] - ) - - return convert_info_list - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before the Falcon's transformer block number.""" - return "transformer.h." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in Falcon.""" - return cast(FalconConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in Falcon.""" - return cast(FalconConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in Falcon.""" - return cast(FalconConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in Falcon.""" - config = cast(FalconConfig, self.config) - - if config.new_decoder_architecture: - if config.num_kv_heads is not None: - return config.num_kv_heads - return config.num_attention_heads - - if config.multi_query: - return 1 - - if config.num_kv_heads is not None: - return config.num_kv_heads - return config.num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head size of Falcon.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in falcon MLP.""" - return self.decoder_hidden_size * 4 - - @property - def rotary_dim(self) -> int: - """The rotary embedding dimesion of Falcon.""" - return self.decoder_head_size - - @property - def rotary_emb_base(self) -> float: - """The rotary embedding base of Falcon.""" - return cast(FalconConfig, self.config).rope_theta diff --git a/friendli/modules/converter/models/gpt2.py b/friendli/modules/converter/models/gpt2.py deleted file mode 100644 index d2e2de5d..00000000 --- a/friendli/modules/converter/models/gpt2.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPT2 Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import torch -from transformers import GPT2Config # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - SUPPORTED_GELU_FAMILY, - DecoderOnlyConverter, -) -from friendli.modules.converter.schema import ConvertInfo - - -class GPT2LMHeadModelConverter(DecoderOnlyConverter): - """GPT2LMHeadModel Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if GPT2 architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if ( - cast(GPT2Config, self.config).activation_function - not in SUPPORTED_GELU_FAMILY - ): - raise NotSupportedCheckpointError( - invalid_option="'activation_function=" - f"{cast(GPT2Config, self.config).activation_function}'", - valid_options=SUPPORTED_GELU_FAMILY, - ) - if cast(GPT2Config, self.config).scale_attn_by_inverse_layer_idx: - raise NotSupportedCheckpointError( - invalid_option="'scale_attn_by_inverse_layer_idx=True'", - valid_options=[False], - ) - if not cast(GPT2Config, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=False'", - valid_options=[True], - ) - if cast(GPT2Config, self.config).layer_norm_epsilon != 1e-5: - raise NotSupportedCheckpointError( - invalid_option="'layer_norm_epsilon=" - f"{cast(GPT2Config, self.config).layer_norm_epsilon}'", - valid_options=[1e-5], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(GPT2Config, self.config) - - logger.warn( - "Since GPT2 uses absolute position embedding, 'max_length' cannot be " - "larger than %d.", - config.n_positions, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "num_heads": self.decoder_num_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": config.n_positions, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "gpt" - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in GPT2.""" - return [ - ConvertInfo( - param_names=["transformer.wte.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.wpe.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/wpe/weight:0", - reshape_fn=self.pos_embed_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ] - - def linear_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """Reshape linear weight in GPT2, which does not need weight transpose.""" - assert len(params) == 1 - return params[0] - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in GPT2.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ln_1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_1.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.c_attn.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.c_proj.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_2.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_2.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.c_fc.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.c_proj.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.c_attn.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.c_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.c_fc.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.c_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - return convert_info_list - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before GPT2's transformer block number.""" - return "transformer.h." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in GPT2.""" - return cast(GPT2Config, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in GPT2.""" - return cast(GPT2Config, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in GPT2.""" - return cast(GPT2Config, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in gpt2.""" - return self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in codegen MLP.""" - return self.decoder_hidden_size * 4 - - @property - def decoder_head_size(self) -> int: - """The head siez of GPT2.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads diff --git a/friendli/modules/converter/models/gpt_neox.py b/friendli/modules/converter/models/gpt_neox.py deleted file mode 100644 index 47fe88a1..00000000 --- a/friendli/modules/converter/models/gpt_neox.py +++ /dev/null @@ -1,328 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPT NeoX Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import torch -from transformers import GPTNeoXConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - SUPPORTED_GELU_FAMILY, - DecoderOnlyConverter, -) -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import convert_to_gpt_j_params - - -class GPTNeoXForCausalLMConverter( - DecoderOnlyConverter, RotaryEmbeddingConversionInterface -): - """GPTNeoXForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if GPTNeoX architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(GPTNeoXConfig, self.config).hidden_act not in SUPPORTED_GELU_FAMILY: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(GPTNeoXConfig, self.config).hidden_act}'", - valid_options=SUPPORTED_GELU_FAMILY, - ) - if not cast(GPTNeoXConfig, self.config).use_parallel_residual: - raise NotSupportedCheckpointError( - invalid_option="'use_parallel_residual=False'", - valid_options=[True], - ) - if cast(GPTNeoXConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if cast(GPTNeoXConfig, self.config).layer_norm_eps != 1e-5: - raise NotSupportedCheckpointError( - invalid_option="'layer_norm_eps=" - f"{cast(GPTNeoXConfig, self.config).layer_norm_eps}'", - valid_options=[1e-5], - ) - if cast(GPTNeoXConfig, self.config).rotary_emb_base != 10000: - raise NotSupportedCheckpointError( - invalid_option=( - f"'rotary_emb_base={cast(GPTNeoXConfig, self.config).rotary_emb_base}'" - ), - valid_options=[10000], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def qkv_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """qkv_weight_reshape for GPTNeoX's attention layer.""" - assert len(params) == 1 - qkv_weight = params[0] - qkv_weight = qkv_weight.reshape( - self.decoder_num_attention_heads, - 3, - self.decoder_head_size, - self.decoder_hidden_size, - ) - - q_weight = qkv_weight[:, 0].reshape( - self.decoder_num_attention_heads, - self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = qkv_weight[:, 1].reshape( - self.decoder_num_attention_heads, - self.decoder_head_size, - self.decoder_hidden_size, - ) - v_weight = qkv_weight[:, 2].reshape( - self.decoder_num_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - - q_weight = convert_to_gpt_j_params(param=q_weight, rotary_dim=self.rotary_dim) - k_weight = convert_to_gpt_j_params(param=k_weight, rotary_dim=self.rotary_dim) - q_weight = q_weight.reshape( - self.decoder_num_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = k_weight.reshape( - self.decoder_num_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - - qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0) - qkv_weight = qkv_weight.transpose(0, 1) - - return qkv_weight - - def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """qkv_bias_reshape for GPTNeoX's attention layer.""" - assert len(params) == 1 - qkv_bias = params[0] - qkv_bias = qkv_bias.reshape( - self.decoder_num_attention_heads, - 3, - self.decoder_head_size, - ) - - q_bias = qkv_bias[:, 0].reshape( - self.decoder_num_attention_heads, self.decoder_head_size - ) - k_bias = qkv_bias[:, 1].reshape( - self.decoder_num_attention_heads, self.decoder_head_size - ) - v_bias = qkv_bias[:, 2].reshape( - self.decoder_num_attention_heads * self.decoder_head_size - ) - - q_bias = convert_to_gpt_j_params(q_bias, self.rotary_dim).flatten() - k_bias = convert_to_gpt_j_params(k_bias, self.rotary_dim).flatten() - - qkv_bias = torch.cat((q_bias, k_bias, v_bias), dim=0) - return qkv_bias - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(GPTNeoXConfig, self.config) - - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The GPTNeoX model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - config.max_position_embeddings, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": config.max_position_embeddings, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "rope_theta": self.rotary_emb_base, - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "gpt-neox" - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in GPTNeoX.""" - return [ - ConvertInfo( - param_names=["gpt_neox.embed_in.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["gpt_neox.final_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["gpt_neox.final_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["embed_out.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ] - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in GPTNeoX.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attention.query_key_value.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/bias:0", - reshape_fn=self.qkv_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attention.dense.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}post_attention_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}post_attention_layernorm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_h_to_4h.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_4h_to_h.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attention.query_key_value.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attention.dense.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_h_to_4h.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.dense_4h_to_h.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - return convert_info_list - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before GPTNeoX's transformer block number.""" - return "gpt_neox.layers." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in GPTNeoX.""" - return cast(GPTNeoXConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in GPTNeoX.""" - return cast(GPTNeoXConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in GPTNeoX.""" - return cast(GPTNeoXConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in gpt_neox.""" - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head siez of GPTNeoX.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in codegen MLP.""" - return self.decoder_hidden_size * 4 - - @property - def rotary_dim(self) -> int: - """The rotary embedding dimesion of GPTNeoX.""" - return int(self.decoder_head_size * cast(GPTNeoXConfig, self.config).rotary_pct) - - @property - def rotary_emb_base(self) -> float: - """The rotary embedding base of GPTNeoX.""" - return float(cast(GPTNeoXConfig, self.config).rotary_emb_base) diff --git a/friendli/modules/converter/models/gptj.py b/friendli/modules/converter/models/gptj.py deleted file mode 100644 index 7e0c464f..00000000 --- a/friendli/modules/converter/models/gptj.py +++ /dev/null @@ -1,430 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPTJ Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import torch -from transformers import GPTJConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - SUPPORTED_GELU_FAMILY, - DecoderOnlyConverter, - DecoderOnlyLoraConverter, -) -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface -from friendli.modules.converter.schema import ConvertInfo - - -class GPTJForCausalLMLoraConverter(DecoderOnlyLoraConverter): - """GPTJForCausalLM LoRA Converter Class.""" - - @property - def adapter_target_module_map(self) -> Dict[str, str]: - """Return the dictionary that maps Hugging Face's module name to Friendli's module name.""" - return { - "q_proj": "query", - "k_proj": "key", - "v_proj": "value", - "out_proj": "attn_fc", - "fc_in": "ff1", - "fc_out": "ff2", - "wte": "wte", - } - - @property - def adapter_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for LoRA adapter modules in GPTJ.""" - convert_info_list = [] - target_modules = self.adapter_target_modules - - # Non-transformer modules - if "wte" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=["transformer.wte.lora_embedding_A.default"], - data_type=self.converter.data_type, - converted_name="wte/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.wte.lora_embedding_B.default"], - data_type=self.converter.data_type, - converted_name="wte/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - # Transformer modules - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.converter.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - - if "query" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.q_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/query_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.q_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/query_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "key" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.k_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/key_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.k_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/key_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "value" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.v_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/value_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.v_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/value_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "attn_fc" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.out_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.out_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "ff1" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.fc_in.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.fc_in.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "ff2" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.fc_out.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.fc_out.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - return convert_info_list - - -class GPTJForCausalLMConverter( - DecoderOnlyConverter, RotaryEmbeddingConversionInterface -): - """GPTJForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if GPTJ architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if ( - cast(GPTJConfig, self.config).activation_function - not in SUPPORTED_GELU_FAMILY - ): - raise NotSupportedCheckpointError( - invalid_option="'activation_function=" - f"{cast(GPTJConfig, self.config).activation_function}'", - valid_options=SUPPORTED_GELU_FAMILY, - ) - if cast(GPTJConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if cast(GPTJConfig, self.config).layer_norm_epsilon != 1e-5: - raise NotSupportedCheckpointError( - invalid_option="'layer_norm_epsilon=" - f"{cast(GPTJConfig, self.config).layer_norm_epsilon}'", - valid_options=[1e-5], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def qkv_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """qkv_weight_reshape for GPTJ's attention layer.""" - assert len(params) == 3 - qkv_weight = torch.cat( - params, - dim=0, - ) - qkv_weight = qkv_weight.transpose(0, 1) - return qkv_weight - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(GPTJConfig, self.config) - - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The GPTJ model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - config.n_positions, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": config.n_positions, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "rope_theta": self.rotary_emb_base, - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "gpt-j" - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in GPTJ.""" - return [ - ConvertInfo( - param_names=["transformer.wte.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.ln_f.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["lm_head.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ConvertInfo( - param_names=["lm_head.bias"], - data_type=self.data_type, - converted_name="head_fc/bias:0", - reshape_fn=self.head_weight_reshape, - ), - ] - - @property - def decoder_convert_info_list(self) -> List[ConvertInfo]: - """The list of conversion informations for transformer modules in GPTJ.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ln_1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_1.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_in.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_out.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.q_proj.weight", - f"{layer_prefix}attn.k_proj.weight", - f"{layer_prefix}attn.v_proj.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_in.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc_out.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - - return convert_info_list - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before GPTJ's transformer module number.""" - return "transformer.h." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in GPTJ.""" - return cast(GPTJConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in GPTJ.""" - return cast(GPTJConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in GPTJ.""" - return cast(GPTJConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in gpt-j.""" - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head siez of GPTJ.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in codegen MLP.""" - return self.decoder_hidden_size * 4 - - @property - def rotary_dim(self) -> int: - """The rotary dim in GPTJ.""" - return cast(GPTJConfig, self.config).rotary_dim - - @property - def rotary_emb_base(self) -> float: - """The rotary emb base in GPTJ.""" - return 10000.0 diff --git a/friendli/modules/converter/models/llama.py b/friendli/modules/converter/models/llama.py deleted file mode 100644 index 19381d5a..00000000 --- a/friendli/modules/converter/models/llama.py +++ /dev/null @@ -1,494 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli LLaMA Checkpoint Converter.""" - - -from __future__ import annotations - -from typing import Any, Dict, Iterable, List, Set, cast - -import torch -from transformers import LlamaConfig, LlamaForCausalLM # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - DecoderOnlyConverter, - DecoderOnlyLoraConverter, -) -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import convert_to_gpt_j_params - - -class LlamaForCausalLMLoraConverter(DecoderOnlyLoraConverter): - """LlamaForCausalLM LoRA Converter Class.""" - - def pre_convert( - self, - model: torch.nn.Module, - ) -> torch.nn.Module: - """Adjust the LoRA Adapter module's params in Llama before converting.""" - converter = cast(LlamaForCausalLMConverter, self.converter) - for layer in cast(LlamaForCausalLM, model).model.layers: - if "query" in self.adapter_target_modules: - query_b = layer.self_attn.q_proj.lora_B.default.weight - query_b = query_b.reshape( - converter.decoder_num_attention_heads, - converter.decoder_head_size, - -1, - ) - query_b = convert_to_gpt_j_params(query_b, converter.decoder_head_size) - query_b = query_b.reshape( - converter.decoder_num_attention_heads * converter.decoder_head_size, - -1, - ) - layer.self_attn.q_proj.lora_B.default.weight.data = query_b - - if "key" in self.adapter_target_modules: - key_b = layer.self_attn.k_proj.lora_B.default.weight - key_b = key_b.reshape( - converter.decoder_num_kv_attention_heads, - converter.decoder_head_size, - -1, - ) - key_b = convert_to_gpt_j_params(key_b, converter.decoder_head_size) - key_b = key_b.reshape( - converter.decoder_num_attention_heads * converter.decoder_head_size, - -1, - ) - layer.self_attn.k_proj.lora_B.default.weight.data = key_b - - return model - - @property - def adapter_target_module_map(self) -> Dict[str, str]: - """Return the dictionary that maps Hugging Face's module name to Friendli's module name.""" - return { - "q_proj": "query", - "k_proj": "key", - "v_proj": "value", - "o_proj": "attn_fc", - "up_proj": "ff1", - "gate_proj": "ff_gate", - "down_proj": "ff2", - "embed_tokens": "wte", - } - - @property - def adapter_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for LoRA adapter modules in Llama.""" - convert_info_list = [] - target_modules = self.adapter_target_modules - - # Non-transformer modules - if "wte" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=["model.embed_tokens.lora_embedding_A.default"], - data_type=self.converter.data_type, - converted_name="wte/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=["model.embed_tokens.lora_embedding_B.default"], - data_type=self.converter.data_type, - converted_name="wte/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - # Transformer modules - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.converter.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - assert self.adapter_config.target_modules is not None - - if "query" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/query_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/query_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "key" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.k_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/key_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.k_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/key_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "value" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.v_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/value_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.v_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/value_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "attn_fc" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.o_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.o_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "ff1" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.up_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.up_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "ff_gate" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.gate_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_gate/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.gate_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_gate/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "ff2" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.down_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.down_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - return convert_info_list - - -class LlamaForCausalLMConverter( - DecoderOnlyConverter, RotaryEmbeddingConversionInterface -): - """LlamaForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if LLaMA architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(LlamaConfig, self.config).hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(LlamaConfig, self.config).hidden_act}'", - valid_options=["silu"], - ) - if cast(LlamaConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if cast(LlamaConfig, self.config).rms_norm_eps not in (1e-5, 1e-6): - raise NotSupportedCheckpointError( - invalid_option=f"'rms_norm_eps={cast(LlamaConfig, self.config).rms_norm_eps}'", - valid_options=[1e-5, 1e-6], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def qkv_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """qkv_weight_reshape for LLaMA's attention layer.""" - assert len(params) == 3 - q_weight = params[0] - k_weight = params[1] - v_weight = params[2] - - q_weight = q_weight.reshape( - self.decoder_num_attention_heads, - self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = k_weight.reshape( - self.decoder_num_kv_attention_heads, - self.decoder_head_size, - self.decoder_hidden_size, - ) - q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim) - k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim) - q_weight = q_weight.reshape( - self.decoder_num_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = k_weight.reshape( - self.decoder_num_kv_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - - qkv_weight = torch.cat([q_weight, k_weight, v_weight], dim=0) - qkv_weight = qkv_weight.transpose(0, -1) - return qkv_weight - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(LlamaConfig, self.config) - - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The Llama model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - config.max_position_embeddings, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_kv_heads": self.decoder_num_kv_attention_heads, - "num_layers": self.decoder_layer_num, - "ff_intermediate_size": self.decoder_ff_intermediate_size, - "max_length": config.max_position_embeddings, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "rope_theta": self.rotary_emb_base, - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "llama" - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in LLaMA.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.weight", - f"{layer_prefix}self_attn.k_proj.weight", - f"{layer_prefix}self_attn.v_proj.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.o_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}post_attention_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.gate_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_gate/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.up_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.down_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - return convert_info_list - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in LLaMA.""" - return [ - ConvertInfo( - param_names=["model.embed_tokens.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["model.norm.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["lm_head.weight"], - data_type=self.data_type, - converted_name=f"head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ] - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before LLaMA's transformer block number.""" - return "model.layers." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in LLaMA.""" - return cast(LlamaConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in LLaMA.""" - return cast(LlamaConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in LLaMA.""" - return cast(LlamaConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in LLaMA.""" - config = cast(LlamaConfig, self.config) - if config.num_key_value_heads is None: - return self.decoder_num_attention_heads - return config.num_key_value_heads - - @property - def decoder_head_size(self) -> int: - """The head size of LLaMA.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in LLaMA MLP.""" - return self.config.intermediate_size - - @property - def rotary_dim(self) -> int: - """The rotary embedding dimension of LLaMA.""" - return self.decoder_head_size - - @property - def rotary_emb_base(self) -> float: - """The rotary embedding base of LLaMA.""" - return cast(LlamaConfig, self.config).rope_theta diff --git a/friendli/modules/converter/models/mistral.py b/friendli/modules/converter/models/mistral.py deleted file mode 100644 index bfc9e75b..00000000 --- a/friendli/modules/converter/models/mistral.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Mistral Checkpoint Converter.""" - - -from __future__ import annotations - -from typing import Any, Dict, cast - -from transformers import MistralConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.models.llama import ( - LlamaForCausalLMConverter, - LlamaForCausalLMLoraConverter, -) - - -class MistralForCausalLMLoraConverter(LlamaForCausalLMLoraConverter): - """MistralForCausalLM LoRA Converter Class.""" - - -class MistralForCausalLMConverter(LlamaForCausalLMConverter): - """MistralForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Mistral architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(MistralConfig, self.config).hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(MistralConfig, self.config).hidden_act}'", - valid_options=["silu"], - ) - if cast(MistralConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - - if cast(MistralConfig, self.config).rms_norm_eps not in (1e-5, 1e-6): - raise NotSupportedCheckpointError( - invalid_option=f"'rms_norm_eps={cast(MistralConfig, self.config).rms_norm_eps}'", - valid_options=[1e-5, 1e-6], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(MistralConfig, self.config) - - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The Mistral model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - config.max_position_embeddings, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_kv_heads": self.decoder_num_kv_attention_heads, - "num_layers": self.decoder_layer_num, - "ff_intermediate_size": self.decoder_ff_intermediate_size, - "max_length": config.max_position_embeddings, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "attention_window_size": self.attention_window_size, # for sliding window, - "rope_theta": self.rotary_emb_base, - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "mistral" - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in Mistral.""" - return cast(MistralConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in Mistral.""" - return cast(MistralConfig, self.config).hidden_size - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of linear layer in Mistral MLP.""" - return cast(MistralConfig, self.config).intermediate_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in Mistral.""" - return cast(MistralConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in Mistral.""" - config = cast(MistralConfig, self.config) - if config.num_key_value_heads is None: - return self.decoder_num_attention_heads - return config.num_key_value_heads - - @property - def attention_window_size(self) -> int: - """The size of sliding window attention in Mistral.""" - return cast(MistralConfig, self.config).sliding_window diff --git a/friendli/modules/converter/models/mixtral.py b/friendli/modules/converter/models/mixtral.py deleted file mode 100644 index 5cf5a366..00000000 --- a/friendli/modules/converter/models/mixtral.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Mixtral Checkpoint Converter.""" - - -from __future__ import annotations - -from typing import Any, Dict, List, Optional, cast - -from transformers import MixtralConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.models.llama import LlamaForCausalLMConverter -from friendli.modules.converter.schema import ConvertInfo - - -class MixtralForCausalLMConverter(LlamaForCausalLMConverter): - """MixtralForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Mixtral architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(MixtralConfig, self.config).hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(MixtralConfig, self.config).hidden_act}'", - valid_options=["silu"], - ) - if cast(MixtralConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if cast(MixtralConfig, self.config).num_local_experts != 8: - raise NotSupportedCheckpointError( - invalid_option=f"'num_local_experts={cast(MixtralConfig, self.config).num_local_experts}", - valid_options=[8], - ) - if cast(MixtralConfig, self.config).num_experts_per_tok != 2: - raise NotSupportedCheckpointError( - invalid_option=f"'num_experts_per_tok={cast(MixtralConfig, self.config).num_experts_per_tok}", - valid_options=[2], - ) - - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(MixtralConfig, self.config) - - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The Mixtral model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - config.max_position_embeddings, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_kv_heads": self.decoder_num_kv_attention_heads, - "num_layers": self.decoder_layer_num, - "ff_intermediate_size": self.decoder_ff_intermediate_size, - "max_length": config.max_position_embeddings, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "rope_theta": self.rotary_emb_base, - "num_experts": self.num_experts, - } - if isinstance(self.attention_window_size, int): - # for sliding window - attr["attention_window_size"] = self.attention_window_size - return attr - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in LLaMA.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}input_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.weight", - f"{layer_prefix}self_attn.k_proj.weight", - f"{layer_prefix}self_attn.v_proj.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.o_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}post_attention_layernorm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}block_sparse_moe.gate.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}moe/router/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - for i in range(self.num_experts): - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}block_sparse_moe.experts.{i}.w1.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}moe/{i}/mlp/c_gate/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}block_sparse_moe.experts.{i}.w2.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}moe/{i}/mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}block_sparse_moe.experts.{i}.w3.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}moe/{i}/mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - return convert_info_list - - @property - def model_type(self) -> str: - """Model type.""" - return "mixtral" - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in Mixtral.""" - return cast(MixtralConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in Mixtral.""" - return cast(MixtralConfig, self.config).hidden_size - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of linear layer in Mixtral MoEs.""" - return cast(MixtralConfig, self.config).intermediate_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in Mixtral.""" - return cast(MixtralConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in Mixtral.""" - config = cast(MixtralConfig, self.config) - if config.num_key_value_heads is None: - return self.decoder_num_attention_heads - return config.num_key_value_heads - - @property - def attention_window_size(self) -> Optional[int]: - """The size of sliding window attention in Mixtral.""" - return cast(MixtralConfig, self.config).sliding_window - - @property - def num_experts(self) -> int: - """The number of moe experts per transformer block in Mixtral.""" - return cast(MixtralConfig, self.config).num_local_experts - - @property - def num_selected_moe_experts(self) -> int: - """The number of selected moe experts per transformer block in Mixtral.""" - return cast(MixtralConfig, self.config).num_experts_per_tok diff --git a/friendli/modules/converter/models/mpt.py b/friendli/modules/converter/models/mpt.py deleted file mode 100644 index 48c332b6..00000000 --- a/friendli/modules/converter/models/mpt.py +++ /dev/null @@ -1,397 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli MPT Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -from transformers import ( # type: ignore[import] - GenerationConfig, - MptConfig, - PretrainedConfig, -) - -from friendli.enums import ModelDataType # type: ignore[import] -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - DecoderOnlyConverter, - DecoderOnlyLoraConverter, -) -from friendli.modules.converter.schema import ConvertInfo - - -def safe_attn_config_get(attn_config: Dict[str, Any], key: str) -> Any: - """Safe getter from MptAttentionConfig. - - This function is a temporary function because MptAttentionConfig - is not supported `attn_type="grouped_query_attention"` yet. - """ - if key not in attn_config: - raise CheckpointConversionError( - f"{key} does not exist in MptAttentionConfig {attn_config}" - ) - - return attn_config[key] - - -class MptForCausalLMLoraConverter(DecoderOnlyLoraConverter): - """MptForCausalLM LoRA Converter Class.""" - - @property - def adapter_target_module_map(self) -> Dict[str, str]: - """Return the dictionary that maps Hugging Face's module name to Friendli's module name.""" - return { - "Wqkv": "merged-qkv", - "out_proj": "attn_fc", - "up_proj": "ff1", - "down_proj": "ff2", - "wte": "wte", - } - - @property - def adapter_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for LoRA adapter modules in Mpt.""" - convert_info_list = [] - target_modules = self.adapter_target_modules - - # Non-transformer modules - if "wte" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=["transformer.wte.lora_embedding_A.default"], - data_type=self.converter.data_type, - converted_name="wte/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.wte.lora_embedding_B.default"], - data_type=self.converter.data_type, - converted_name="wte/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - # Transformer modules - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.converter.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - - if "merged-qkv" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.Wqkv.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.Wqkv.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "attn_fc" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.out_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}attn.out_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "ff1" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.up_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.up_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - if "ff2" in target_modules: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.down_proj.lora_A.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_A/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}mlp.down_proj.lora_B.default.weight" - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_B/weight:0", - reshape_fn=self.lora_weight_reshape, - ), - ] - ) - - return convert_info_list - - -class MPTForCausalLMConverter(DecoderOnlyConverter): - """MPTForCausalLM Architectures Converter Class.""" - - def __init__( - self, - config: PretrainedConfig, - generation_config: GenerationConfig | None, - data_type: ModelDataType, - ) -> None: - """Initialize MPTForCausalLMConverter.""" - super().__init__(config, generation_config, data_type) - attn_config = cast(MptConfig, config).attn_config - if isinstance(attn_config, PretrainedConfig): - attn_config = attn_config.to_dict() # type: ignore - self.attn_config = attn_config - - def check_config(self) -> None: - """Check if MPT architectures' config can be converted to Friendli format.""" - super().check_config() - - if not safe_attn_config_get(self.attn_config, "alibi"): - raise NotSupportedCheckpointError( - invalid_option=f"'alibi={safe_attn_config_get(self.attn_config, 'alibi')}'", - valid_options=[True], - ) - - if safe_attn_config_get(self.attn_config, "alibi_bias_max") != 8: - raise NotSupportedCheckpointError( - invalid_option=f"'alibi={safe_attn_config_get(self.attn_config, 'alibi_bias_max')}'", - valid_options=[8], - ) - - if safe_attn_config_get(self.attn_config, "attn_type") != "multihead_attention": - if ( - safe_attn_config_get(self.attn_config, "attn_type") - == "grouped_query_attention" - ): - raise CheckpointConversionError( - msg="MptAttentionConfig does not support `attn_type=`grouped_query_attention`` yet (as of transformers==4.35.2).", - ) - raise NotSupportedCheckpointError( - invalid_option=f"'attn_type={safe_attn_config_get(self.attn_config, 'attn_type')}'", - valid_options=["multihead_attention"], - ) - - if safe_attn_config_get(self.attn_config, "prefix_lm"): - raise NotSupportedCheckpointError( - invalid_option=f"'prefix_lm={safe_attn_config_get(self.attn_config, 'prefix_lm')}'", - valid_options=[False], - ) - - if safe_attn_config_get(self.attn_config, "qk_ln"): - raise NotSupportedCheckpointError( - invalid_option=f"'qk_ln={safe_attn_config_get(self.attn_config, 'qk_ln')}'", - valid_options=[False], - ) - - if safe_attn_config_get(self.attn_config, "softmax_scale") is not None: - raise NotSupportedCheckpointError( - invalid_option=f"'softmax_scale={safe_attn_config_get(self.attn_config, 'softmax_scale')}'", - valid_options=[None], - ) - - if cast(MptConfig, self.config).expansion_ratio != 4: - raise NotSupportedCheckpointError( - invalid_option=( - f"'expansion_ratio={cast(MptConfig, self.config).expansion_ratio}'" - ), - valid_options=[4], - ) - - if not cast(MptConfig, self.config).no_bias: - raise NotSupportedCheckpointError( - invalid_option=f"'no_bias={cast(MptConfig, self.config).no_bias}'", - valid_options=[True], - ) - - if cast(MptConfig, self.config).logit_scale is not None: - raise NotSupportedCheckpointError( - invalid_option=( - f"'logit_scale={cast(MptConfig, self.config).logit_scale}'" - ), - valid_options=[None], - ) - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in MPT.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}norm_1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}norm_2.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.Wqkv.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ffn.up_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ffn.down_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - - return convert_info_list - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in MPT.""" - return [ - ConvertInfo( - param_names=["transformer.wte.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["transformer.norm_f.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ] - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The MPT model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - cast(MptConfig, self.config).max_seq_len, - ) - - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "num_heads": self.decoder_num_attention_heads, - "num_kv_heads": self.decoder_num_kv_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": cast(MptConfig, self.config).max_seq_len, - "vocab_size": cast(MptConfig, self.config).vocab_size, - "clip_qkv": safe_attn_config_get(self.attn_config, "clip_qkv") or 0.0, - "eos_token": self.get_eos_token_id() or "FILL ME", - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "mpt" - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before the MPT's transformer block number.""" - return "transformer.blocks." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in MPT.""" - return cast(MptConfig, self.config).n_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in MPT.""" - return cast(MptConfig, self.config).d_model - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in MPT.""" - return cast(MptConfig, self.config).n_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in MPT.""" - if "kv_n_heads" in self.attn_config: - return self.attn_config["kv_n_heads"] - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head size of MPT.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in MPT MLP.""" - return self.decoder_hidden_size * 4 diff --git a/friendli/modules/converter/models/opt.py b/friendli/modules/converter/models/opt.py deleted file mode 100644 index 6d8ad8aa..00000000 --- a/friendli/modules/converter/models/opt.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli OPT Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import torch -from transformers import OPTConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import DECODER_PREFIX, DecoderOnlyConverter -from friendli.modules.converter.schema import ConvertInfo - - -class OPTForCausalLMConverter(DecoderOnlyConverter): - """OPTForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if OPT architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(OPTConfig, self.config).activation_function not in ["relu"]: - raise NotSupportedCheckpointError( - invalid_option="'activation_function=" - f"{cast(OPTConfig, self.config).activation_function}'", - valid_options=["relu"], - ) - if not cast(OPTConfig, self.config).do_layer_norm_before is True: - raise NotSupportedCheckpointError( - invalid_option=f"'do_layer_norm_before={False}'", - valid_options=[True], - ) - if ( - cast(OPTConfig, self.config).word_embed_proj_dim - != cast(OPTConfig, self.config).hidden_size - ): - raise NotSupportedCheckpointError( - invalid_option="'word_embed_proj_dim" - f"({cast(OPTConfig, self.config).word_embed_proj_dim}) " - f"!= hidden_size({cast(OPTConfig, self.config).hidden_size})'", - valid_options=[ - f"'word_embed_proj_dim({cast(OPTConfig, self.config).hidden_size}) " - f"== hidden_size({cast(OPTConfig, self.config).hidden_size})'" - ], - ) - if cast( # pylint: disable=protected-access - OPTConfig, self.config - )._remove_final_layer_norm: - raise NotSupportedCheckpointError( - invalid_option=f"'_remove_final_layer_norm={True}'", - valid_options=[False], - ) - if not cast(OPTConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option=f"'tie_word_embeddings={False}'", - valid_options=[True], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def pos_embed_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """Positional embedding weight convert for OPT's decoder.""" - assert len(params) == 1 - pos_emb = params[0] - pos_emb = pos_emb[2:, :] # offset pos emb - - return pos_emb - - def qkv_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """qkv_weight_reshape for OPT's attention layer.""" - qkv_weight = torch.cat( - params, - dim=0, - ) - qkv_weight = qkv_weight.transpose(0, 1) - return qkv_weight - - def qkv_bias_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """qkv_bias_reshape for OPT's attention layer.""" - qkv_bias = torch.cat( - params, - dim=0, - ) - return qkv_bias - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(OPTConfig, self.config) - - logger.warn( - "Since OPT uses absolute position embedding, 'max_length' cannot be " - "larger than %d.", - config.max_position_embeddings, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "num_heads": self.decoder_num_attention_heads, - "num_layers": self.decoder_layer_num, - "max_length": config.max_position_embeddings, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "opt" - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in OPT.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}self_attn_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.bias", - f"{layer_prefix}self_attn.k_proj.bias", - f"{layer_prefix}self_attn.v_proj.bias", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/bias:0", - reshape_fn=self.qkv_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.out_proj.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}final_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}final_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc1.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc2.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc2.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}fc1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}self_attn.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}self_attn.q_proj.weight", - f"{layer_prefix}self_attn.k_proj.weight", - f"{layer_prefix}self_attn.v_proj.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ] - ) - return convert_info_list - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in OPT.""" - return [ - ConvertInfo( - param_names=["model.decoder.embed_tokens.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["model.decoder.embed_positions.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/wpe/weight:0", - reshape_fn=self.pos_embed_weight_reshape, - ), - ConvertInfo( - param_names=["model.decoder.final_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["model.decoder.final_layer_norm.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["lm_head.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ] - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before OPT's transformer block number.""" - return "model.decoder.layers." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in OPT.""" - return cast(OPTConfig, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in OPT.""" - return cast(OPTConfig, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in OPT.""" - return cast(OPTConfig, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in opt.""" - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head size of OPT.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in codegen OPT.""" - return self.decoder_hidden_size * 4 diff --git a/friendli/modules/converter/models/phi3.py b/friendli/modules/converter/models/phi3.py deleted file mode 100644 index 3f05b8bd..00000000 --- a/friendli/modules/converter/models/phi3.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Cohere Checkpoint Converter.""" - - -from __future__ import annotations - -from typing import cast - -from transformers.models.phi3 import Phi3Config # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.modules.converter.base import FP8OnlyConverter -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface - - -class Phi3ForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface): - """Phi3ForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if phi3 architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if cast(Phi3Config, self.config).hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(Phi3Config, self.config).hidden_act}'", - valid_options=["silu"], - ) - if cast(Phi3Config, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - @property - def model_type(self) -> str: - """Model type.""" - return "phi3" - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before phi3's transformer block number.""" - return "model.layers." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in phi3.""" - return cast(Phi3Config, self.config).num_hidden_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in phi3.""" - return cast(Phi3Config, self.config).hidden_size - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in phi3.""" - return cast(Phi3Config, self.config).num_attention_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in phi3.""" - config = cast(Phi3Config, self.config) - if config.num_key_value_heads is None: - return self.decoder_num_attention_heads - return config.num_key_value_heads - - @property - def decoder_head_size(self) -> int: - """The head size of phi3.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in phi3 MLP.""" - return self.config.intermediate_size - - @property - def rotary_dim(self) -> int: - """The rotary embedding dimension of phi3.""" - return self.decoder_head_size - - @property - def rotary_emb_base(self) -> float: - """The rotary embedding base of phi3.""" - return cast(Phi3Config, self.config).rope_theta diff --git a/friendli/modules/converter/models/phi_msft.py b/friendli/modules/converter/models/phi_msft.py deleted file mode 100644 index 493c4402..00000000 --- a/friendli/modules/converter/models/phi_msft.py +++ /dev/null @@ -1,369 +0,0 @@ -# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved. - -"""Friendli Phi Checkpoint Converter.""" - - -from __future__ import annotations - -import math -from typing import Any, Dict, List, Optional, cast - -import torch -from transformers import PretrainedConfig # type: ignore[import] - -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - SUPPORTED_GELU_FAMILY, - DecoderOnlyConverter, -) -from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import convert_to_gpt_j_params - - -class PhiMsftConfig(PretrainedConfig): - """Phi msft configuration. Different from the HuggingFace PhiConfig.""" - - model_type = "phi" - attribute_map = { - "max_position_embeddings": "n_positions", - "hidden_size": "n_embd", - "num_attention_heads": "n_head", - "num_hidden_layers": "n_layer", - } - - def __init__( - self, - vocab_size: int = 50304, - n_positions: int = 2048, - n_embd: int = 1024, - n_layer: int = 20, - n_inner: Optional[int] = None, - n_head: int = 16, - n_head_kv: Optional[int] = None, - rotary_dim: Optional[int] = 32, - activation_function: Optional[str] = "gelu_new", - flash_attn: bool = False, - flash_rotary: bool = False, - fused_dense: bool = False, - attn_pdrop: float = 0.0, - embd_pdrop: float = 0.0, - resid_pdrop: float = 0.0, - layer_norm_epsilon: float = 1e-5, - initializer_range: float = 0.02, - tie_word_embeddings: bool = False, - pad_vocab_size_multiple: int = 64, - **kwargs, - ) -> None: - """Initalize the configuration for a phi-msft model.""" - self.vocab_size = int( - math.ceil(vocab_size / pad_vocab_size_multiple) * pad_vocab_size_multiple - ) - self.n_positions = n_positions - self.n_embd = n_embd - self.n_layer = n_layer - self.n_inner = n_inner - self.n_head = n_head - self.n_head_kv = n_head_kv - self.rotary_dim = min(rotary_dim, n_embd // n_head) # type: ignore[type-var] - self.activation_function = activation_function - self.flash_attn = flash_attn - self.flash_rotary = flash_rotary - self.fused_dense = fused_dense - self.attn_pdrop = attn_pdrop - self.embd_pdrop = embd_pdrop - self.resid_pdrop = resid_pdrop - self.layer_norm_epsilon = layer_norm_epsilon - self.initializer_range = initializer_range - - super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs) - - -class PhiForCausalLMConverter(DecoderOnlyConverter): - """PhiForCausalLM Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if Phi architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if ( - cast(PhiMsftConfig, self.config).activation_function - not in SUPPORTED_GELU_FAMILY - ): - raise NotSupportedCheckpointError( - invalid_option="'activation_function=" - f"{cast(PhiMsftConfig, self.config).activation_function}'", - valid_options=SUPPORTED_GELU_FAMILY, - ) - if cast(PhiMsftConfig, self.config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def qkv_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """qkv_weight_reshape for Phi's attention layer.""" - assert len(params) == 1 - qkv_weight = params[0] - - q_size = self.decoder_num_attention_heads * self.decoder_head_size - kv_size = self.decoder_num_kv_attention_heads * self.decoder_head_size - q_weight, k_weight, v_weight = torch.split( - qkv_weight, [q_size, kv_size, kv_size], dim=0 - ) - - q_weight = q_weight.reshape( - self.decoder_num_attention_heads, - self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = k_weight.reshape( - self.decoder_num_kv_attention_heads, - self.decoder_head_size, - self.decoder_hidden_size, - ) - - q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim) - k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim) - - q_weight = q_weight.reshape( - self.decoder_num_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - k_weight = k_weight.reshape( - self.decoder_num_kv_attention_heads * self.decoder_head_size, - self.decoder_hidden_size, - ) - - qkv_weight = torch.cat([q_weight, k_weight, v_weight], dim=0) - qkv_weight = qkv_weight.transpose(0, -1) - return qkv_weight - - def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor: - """qkv_bias_reshape for Phi's attention layer.""" - assert len(params) == 1 - qkv_bias = params[0] - - q_size = self.decoder_num_attention_heads * self.decoder_head_size - kv_size = self.decoder_num_kv_attention_heads * self.decoder_head_size - - q_bias, k_bias, v_bias = torch.split( - qkv_bias, [q_size, kv_size, kv_size], dim=0 - ) - - q_bias = q_bias.reshape( - self.decoder_num_attention_heads, self.decoder_head_size - ) - k_bias = k_bias.reshape( - self.decoder_num_kv_attention_heads, self.decoder_head_size - ) - - q_bias = convert_to_gpt_j_params(q_bias, self.rotary_dim).flatten() - k_bias = convert_to_gpt_j_params(k_bias, self.rotary_dim).flatten() - - qkv_bias = torch.cat((q_bias, k_bias, v_bias), dim=0) - return qkv_bias - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(PhiMsftConfig, self.config) - - logger.info( - "The generated attributes set 'max_length' to %d, but you can change the " - "'max_length' according to your needs. The Phi model does not rely on " - "absolute position embeddings, allowing you to choose any suitable value.", - config.n_positions, - ) - - eos_token_id = self.get_eos_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.decoder_head_size, - "rotary_dim": self.rotary_dim, - "num_heads": self.decoder_num_attention_heads, - "num_kv_heads": self.decoder_num_kv_attention_heads, - "num_layers": self.decoder_layer_num, - "ff_intermediate_size": self.decoder_ff_intermediate_size, - "max_length": config.n_positions, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "rope_theta": self.rotary_emb_base, - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - return "phi" - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in Phi.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ln.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}ln_1/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc1.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc1.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc2.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.fc2.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mixer.Wqkv.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mixer.Wqkv.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_attn/bias:0", - reshape_fn=self.qkv_bias_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mixer.out_proj.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mixer.out_proj.bias"], - data_type=self.data_type, - converted_name=f"{converted_prefix}attn/c_proj/bias:0", - reshape_fn=self.linear_bias_reshape, - ), - ] - ) - return convert_info_list - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for non-transformer blocks in Phi.""" - return [ - ConvertInfo( - param_names=["transformer.embd.wte.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=["lm_head.ln.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["lm_head.ln.bias"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/beta:0", - reshape_fn=self.ln_bias_reshape, - ), - ConvertInfo( - param_names=["lm_head.linear.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ), - ConvertInfo( - param_names=["lm_head.linear.bias"], - data_type=self.data_type, - converted_name="head_fc/bias:0", - reshape_fn=self.head_weight_reshape, - ), - ] - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before Phi's transformer module number.""" - return "transformer.h." - - @property - def decoder_layer_num(self) -> int: - """The number of decoder layers in Phi.""" - return cast(PhiMsftConfig, self.config).n_layer - - @property - def decoder_hidden_size(self) -> int: - """The hidden size in Phi.""" - return cast(PhiMsftConfig, self.config).n_embd - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads in Phi.""" - return cast(PhiMsftConfig, self.config).n_head - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads in Phi.""" - config = cast(PhiMsftConfig, self.config) - if config.n_head_kv is not None: - return config.n_head_kv - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head size of Phi.""" - return self.decoder_hidden_size // self.decoder_num_attention_heads - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate size of the linear layer in codegen MLP.""" - config = cast(PhiMsftConfig, self.config) - if config.n_inner is None: - return self.decoder_hidden_size * 4 - return config.n_inner - - @property - def rotary_dim(self) -> int: - """The rotary dim in Phi.""" - return cast(PhiMsftConfig, self.config).rotary_dim # type: ignore[return-value] - - @property - def rotary_emb_base(self) -> float: - """The rotary emb base in Phi.""" - return 10000.0 diff --git a/friendli/modules/converter/models/t5.py b/friendli/modules/converter/models/t5.py deleted file mode 100644 index ba188bd2..00000000 --- a/friendli/modules/converter/models/t5.py +++ /dev/null @@ -1,444 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli T5 Checkpoint Converter.""" - -from __future__ import annotations - -from typing import Any, Dict, List, cast - -import torch -from transformers import T5Config # type: ignore[import] - -from friendli.enums import ModelDataType -from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError -from friendli.logging import logger -from friendli.modules.converter.base import ( - DECODER_PREFIX, - ENCODER_PREFIX, - EncoderDecoderConverter, -) -from friendli.modules.converter.schema import ConvertInfo - - -class T5Converter(EncoderDecoderConverter): - """T5ForConditionalGeneration Architectures Converter Class.""" - - def check_config(self) -> None: - """Check if T5 architectures' config can be converted to Friendli format.""" - super().check_config() - try: - if not ( - cast(T5Config, self.config).is_gated_act - ^ cast(T5Config, self.config).tie_word_embeddings - ): - raise NotSupportedCheckpointError( - invalid_option=f"'is_gated_act={cast(T5Config, self.config).is_gated_act}'and " - f"'tie_word_embeddings={cast(T5Config, self.config).tie_word_embeddings}'", - valid_options=[ - "'is_gated_act' and 'tie_word_embeddings' should be different." - ], - ) - - if cast(T5Config, self.config).layer_norm_epsilon != 1e-6: - raise NotSupportedCheckpointError( - invalid_option="'layer_norm_epsilon=" - f"{cast(T5Config, self.config).layer_norm_epsilon}'", - valid_options=[1e-6], - ) - except AttributeError as exc: - raise CheckpointConversionError(str(exc)) from exc - - def _decoder_final_ln_weight_reshape( - self, params: List[torch.Tensor] - ) -> torch.Tensor: - """Special handle for T5.""" - assert len(params) == 1 - param = params[0] - - if cast(T5Config, self.config).tie_word_embeddings: - param = param * (cast(T5Config, self.config).d_model ** -0.5) - - return param - - def pos_embed_weight_reshape( - self, - params: List[torch.Tensor], - ) -> torch.Tensor: - """Reshape positional embedding weights in T5.""" - assert len(params) == 1 - return params[0] - - def get_attributes(self) -> Dict[str, Any]: - """Get checkpoint attributes.""" - config = cast(T5Config, self.config) - - logger.warn( - "The 'max_input_length' and 'max_output_length' fields are left blank as " - "they cannot be automatically configured. " - "Determine the 'max_input_length' and 'max_output_length' according to your " - "needs. The T5 model does not rely on absolute position embeddings, " - "allowing you to choose any suitable value." - ) - - eos_token_id = self.get_eos_token_id() - decoder_start_token_id = self.get_decoder_start_token_id() - attr = { - "model_type": self.model_type, - "dtype": self.data_type.value, - "head_size": self.encoder_head_size, - "num_heads": self.encoder_num_attention_heads, - "hidden_size": self.encoder_hidden_size, - "ff_intermediate_size": self.decoder_ff_intermediate_size, - "num_encoder_layers": self.encoder_layer_num, - "num_decoder_layers": self.decoder_layer_num, - "max_input_length": "FILL ME", - "max_output_length": "FILL ME", - "num_pos_emb_buckets": config.relative_attention_num_buckets, - "max_pos_distance": config.relative_attention_max_distance, - "vocab_size": config.vocab_size, - "eos_token": eos_token_id if eos_token_id is not None else "FILL ME", - "decoder_start_token": ( - decoder_start_token_id - if decoder_start_token_id is not None - else "FILL ME" - ), - } - return attr - - @property - def model_type(self) -> str: - """Model type.""" - if cast(T5Config, self.config).is_gated_act: - return "t5-v1_1" - return "t5" - - @property - def encoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in T5's encoder.""" - convert_info_list = [] - for i in range(self.encoder_layer_num): - layer_prefix = f"{self.encoder_layer_prefix}{i}." - converted_prefixe = f"{ENCODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}layer.0.layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}layer.1.layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.0.SelfAttention.q.weight", - f"{layer_prefix}layer.0.SelfAttention.k.weight", - f"{layer_prefix}layer.0.SelfAttention.v.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}layer.0.SelfAttention.o.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - - if cast(T5Config, self.config).is_gated_act: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.1.DenseReluDense.wi_0.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_gate/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.1.DenseReluDense.wi_1.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.1.DenseReluDense.wo.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - else: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.1.DenseReluDense.wi.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.1.DenseReluDense.wo.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - - return convert_info_list - - @property - def decoder_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The list of conversion informations for transformer blocks in T5's decoder.""" - convert_info_list = [] - for i in range(self.decoder_layer_num): - layer_prefix = f"{self.decoder_layer_prefix}{i}." - converted_prefixe = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}layer.0.layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}ln_1/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}layer.1.layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}ln_2/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}layer.2.layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}ln_3/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.0.SelfAttention.q.weight", - f"{layer_prefix}layer.0.SelfAttention.k.weight", - f"{layer_prefix}layer.0.SelfAttention.v.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}layer.0.SelfAttention.o.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.1.EncDecAttention.q.weight", - f"{layer_prefix}layer.1.EncDecAttention.k.weight", - f"{layer_prefix}layer.1.EncDecAttention.v.weight", - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}cross_attn/c_attn/weight:0", - reshape_fn=self.qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}layer.1.EncDecAttention.o.weight"], - data_type=self.data_type, - converted_name=f"{converted_prefixe}cross_attn/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - - if cast(T5Config, self.config).is_gated_act: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.2.DenseReluDense.wi_0.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_gate/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.2.DenseReluDense.wi_1.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.2.DenseReluDense.wo.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - else: - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.2.DenseReluDense.wi.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_fc/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}layer.2.DenseReluDense.wo.weight" - ], - data_type=self.data_type, - converted_name=f"{converted_prefixe}mlp/c_proj/weight:0", - reshape_fn=self.linear_weight_reshape, - ), - ] - ) - - return convert_info_list - - @property - def non_transformer_convert_info_list( - self, - ) -> List[ConvertInfo]: - """The convert_info_list for non-transformer blocks in T5.""" - convert_info_list = [ - ConvertInfo( - param_names=[f"shared.weight"], - data_type=self.data_type, - converted_name="wte/weight:0", - reshape_fn=self.token_embed_weight_reshape, - ), - ConvertInfo( - param_names=[ - "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight" - ], - data_type=ModelDataType.FP32, - converted_name=f"{ENCODER_PREFIX}/wpe/weight:0", - reshape_fn=self.pos_embed_weight_reshape, - ), - ConvertInfo( - param_names=[ - "decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight" - ], - data_type=ModelDataType.FP32, - converted_name=f"{DECODER_PREFIX}/wpe/weight:0", - reshape_fn=self.pos_embed_weight_reshape, - ), - ConvertInfo( - param_names=["encoder.final_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{ENCODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self.ln_weight_reshape, - ), - ConvertInfo( - param_names=["decoder.final_layer_norm.weight"], - data_type=self.data_type, - converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0", - reshape_fn=self._decoder_final_ln_weight_reshape, - ), - ] - - if not cast(T5Config, self.config).tie_word_embeddings: - convert_info_list.append( - ConvertInfo( - param_names=["lm_head.weight"], - data_type=self.data_type, - converted_name="head_fc/weight:0", - reshape_fn=self.head_weight_reshape, - ) - ) - - return convert_info_list - - @property - def encoder_layer_prefix(self) -> str: - """The layer name prefix used before T5 encoder's transformer block number.""" - return "encoder.block." - - @property - def decoder_layer_prefix(self) -> str: - """The layer name prefix used before T5 decoder's transformer block number.""" - return "decoder.block." - - @property - def encoder_layer_num(self) -> int: - """The number of transformer blocks in T5 encoder.""" - return cast(T5Config, self.config).num_layers - - @property - def encoder_hidden_size(self) -> int: - """The hidden size of T5 encoder.""" - return cast(T5Config, self.config).d_model - - @property - def encoder_num_attention_heads(self) -> int: - """The number of attention heads of T5 encoder.""" - return cast(T5Config, self.config).num_heads - - @property - def encoder_head_size(self) -> int: - """The head size of T5 encoder.""" - return cast(T5Config, self.config).d_kv - - @property - def encoder_ff_intermediate_size(self) -> int: - """The intermediate of the linear layer in T5 encoder's MLP.""" - return cast(T5Config, self.config).d_ff - - @property - def decoder_layer_num(self) -> int: - """The number of transformer blocks in T5 decoder.""" - return cast(T5Config, self.config).num_decoder_layers - - @property - def decoder_hidden_size(self) -> int: - """The hidden size of T5 decoder.""" - return cast(T5Config, self.config).d_model - - @property - def decoder_num_attention_heads(self) -> int: - """The number of attention heads of T5 decoder.""" - return cast(T5Config, self.config).num_heads - - @property - def decoder_num_kv_attention_heads(self) -> int: - """The number of key-value attention heads of t5 decoder.""" - return self.decoder_num_attention_heads - - @property - def decoder_head_size(self) -> int: - """The head size of T5 decoder.""" - return cast(T5Config, self.config).d_kv - - @property - def decoder_ff_intermediate_size(self) -> int: - """The intermediate of the linear layer in T5 decoder's MLP.""" - return cast(T5Config, self.config).d_ff diff --git a/friendli/modules/converter/saver.py b/friendli/modules/converter/saver.py deleted file mode 100644 index e9d6d2ae..00000000 --- a/friendli/modules/converter/saver.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved. - -"""Savers to save a converted checkpoints into various file types.""" - -from __future__ import annotations - -import json -import os -from abc import abstractmethod -from contextlib import AbstractContextManager -from typing import Dict, Generic, List, TypeVar, Union - -import h5py # type: ignore[import] -import numpy as np -import safetensors.numpy # type: ignore[import] -import safetensors.torch # type: ignore[import] -import torch -from typing_extensions import Self - -from friendli.enums import CheckpointFileType -from friendli.errors import CheckpointConversionError -from friendli.logging import logger - - -def get_saver( - ckpt_file_type: CheckpointFileType, output_dir: str, output_file_name: str -) -> CheckpointSaver: - """Create a saver that corresponds to the file type.""" - if ckpt_file_type == CheckpointFileType.HDF5: - return HDF5Saver(output_dir, output_file_name) - if ckpt_file_type == CheckpointFileType.SAFETENSORS: - return SafetensorsSaver(output_dir, output_file_name) - raise CheckpointConversionError( - f"Output file type {ckpt_file_type} is not supported." - ) - - -class CheckpointSaver(AbstractContextManager): - """Abstract for savers.""" - - def __init__( - self, output_dir: Union[str, os.PathLike], output_file_name: str - ) -> None: - """Check that the output file already exists.""" - super().__init__() - self._output_dir = output_dir - self._output_file_name = output_file_name - - @abstractmethod - def save_tensor(self, tensor_id: str, t: Union[np.ndarray, torch.Tensor]) -> None: - """Save the tensor in the file.""" - raise NotImplementedError - - @abstractmethod - def close(self) -> None: - """Close the output checkpoint file.""" - raise NotImplementedError - - def __enter__(self) -> Self: - """Enter for context manager.""" - return self - - def __exit__(self, *exc) -> None: - """Exit for context manager.""" - self.close() - - -class HDF5Saver(CheckpointSaver): - """Saver for HDF5.""" - - def __init__(self, output_dir: str, output_file_name: str) -> None: - """Create a HDF5 file.""" - super().__init__(output_dir, output_file_name) - self._out_f = h5py.File(os.path.join(output_dir, output_file_name), "w") - - def save_tensor(self, tensor_id: str, t: Union[np.ndarray, torch.Tensor]) -> None: - """Create a group if not exists, and save the tensor in the file.""" - assert isinstance(t, np.ndarray) - self._out_f[tensor_id] = t - - def close(self) -> None: - """Close the HDF5 file.""" - self._out_f.close() - - -T = TypeVar("T") - - -class SafetensorsSaverInterface(Generic[T]): - """Interface for saving safetensor format.""" - - def get_weight_size(self, tensor: T) -> int: - """Get total weight size in `Byte` unit.""" - raise NotImplementedError - - def save_file(self, tensor: Dict[str, T], path: str) -> None: - """Save given tensor to path.""" - raise NotImplementedError - - -class TorchSafetensorsSaverInterface(SafetensorsSaverInterface[torch.Tensor]): - """Interface for saving safetensor format.""" - - def get_weight_size(self, tensor: torch.Tensor) -> int: - """Get total weight size in `Byte` unit.""" - return tensor.itemsize * tensor.numel() - - def save_file(self, tensor: Dict[str, torch.Tensor], path: str) -> None: - """Save given tensor to path.""" - safetensors.torch.save_file(tensor, path) - - -class NumpySafetensorsSaverInterface(SafetensorsSaverInterface[np.ndarray]): - """Interface for saving safetensor format.""" - - def get_weight_size(self, tensor: np.ndarray) -> int: - """Get total weight size in `Byte` unit.""" - return tensor.itemsize * tensor.size - - def save_file(self, tensor: Dict[str, np.ndarray], path: str) -> None: - """Save given tensor to path.""" - safetensors.numpy.save_file(tensor, path) - - -class UnionSafetensorsSaverInterface( - SafetensorsSaverInterface[Union[torch.Tensor, np.ndarray]] -): - """Interface for saving safetensor format.""" - - def __init__(self) -> None: - """Initialize UnionSafetensorsSaverInterface.""" - self._sub_itfcs = { - np.ndarray: NumpySafetensorsSaverInterface(), - torch.Tensor: TorchSafetensorsSaverInterface(), - } - super().__init__() - - def get_weight_size(self, tensor: Union[torch.Tensor, np.ndarray]) -> int: - """Get total weight size in `Byte` unit.""" - return self._sub_itfcs[type(tensor)].get_weight_size(tensor) # type: ignore[attr-defined] - - def save_file( - self, tensor: Dict[str, Union[torch.Tensor, np.ndarray]], path: str - ) -> None: - """Save given tensor to path.""" - if len(tensor) == 0: - logger.warn("No tensor to save. Skip saving tensors..") - return - # NOTE: Assume that all tensors are the same type - tensor_type = type(next(iter(tensor.values()))) - itfc = self._sub_itfcs[tensor_type] - itfc.save_file(tensor, path) # type: ignore[attr-defined] - - -class SafetensorsSaver(CheckpointSaver): - """Saver for Safetensors. - - This temporally saves the converted tensors in local memory. - Then, all of the tensors are saved in the file at a time when close() is called, - because Safetensors does not support stream saving. - """ - - def __init__( - self, output_dir: Union[str, os.PathLike], output_file_name: str - ) -> None: - """Initialize a saver.""" - super().__init__(output_dir, output_file_name) - self._tensors: Dict[str, Union[np.ndarray, torch.Tensor]] = {} - self._saver: UnionSafetensorsSaverInterface = UnionSafetensorsSaverInterface() - - def save_tensor(self, tensor_id: str, t: Union[np.ndarray, torch.Tensor]) -> None: - """Save the tensor in the local memory.""" - self._tensors[tensor_id] = t - - def shard_checkpoint(self, max_shard_size: str): - """Shard the checkpoint with index.""" - # pylint: disable=too-many-locals - int_max_shard_size = int(max_shard_size[:-2]) * (10**9) - sharded_tensors: List[Dict[str, Union[np.ndarray, torch.Tensor]]] = [{}] - last_block_size = 0 - total_size = 0 - - for key, weight in self._tensors.items(): - weight_size = self._saver.get_weight_size(weight) - if ( - last_block_size + weight_size > int_max_shard_size - and len(sharded_tensors[-1]) > 0 - ): - sharded_tensors.append({}) - last_block_size = 0 - - sharded_tensors[-1][key] = weight - last_block_size += weight_size - total_size += weight_size - - if len(sharded_tensors) == 1: - return {self._output_file_name: sharded_tensors[0]}, None - - weight_map = {} - shards = {} - for idx, shard in enumerate(sharded_tensors): - shard_file = self._output_file_name.replace( - ".safetensors", - f"-{idx + 1:05d}-of-{len(sharded_tensors):05d}.safetensors", - ) - shards[shard_file] = shard - for key in shard.keys(): - weight_map[key] = shard_file - - metadata = {"total_size": total_size} - index = {"metadata": metadata, "weight_map": weight_map} - return shards, index - - def _save_to_file(self) -> None: - """Save the tensors in the file.""" - logger.info("Saving the converted checkpoint...") - - max_shard_size = "10GB" - shards, index = self.shard_checkpoint(max_shard_size) - - for shard_file, shard in shards.items(): - self._saver.save_file(shard, os.path.join(self._output_dir, shard_file)) - - if index is None: - path_to_weights = os.path.join(self._output_dir, self._output_file_name) - logger.info("Model weights saved in (%s)", path_to_weights) - else: - save_index_file = os.path.join( - self._output_dir, "model.safetensors.index.json" - ) - # Save the index as well - with open(save_index_file, "w", encoding="utf-8") as f: - content = json.dumps(index, indent=2, sort_keys=True) + "\n" - f.write(content) - logger.info( - "The model is bigger than the maximum size per checkpoint %s " - " and is going to be split in %s checkpoint shards. You can find " - "where each parameters has been saved in the index located at (%s).", - max_shard_size, - str(len(shards)), - save_index_file, - ) - - def close(self) -> None: - """Save the tensors in the file.""" - self._save_to_file() diff --git a/friendli/modules/converter/schema.py b/friendli/modules/converter/schema.py deleted file mode 100644 index 21f034b1..00000000 --- a/friendli/modules/converter/schema.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Converter Schema.""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Callable, List - -import torch - -from friendli.enums import ModelDataType - - -@dataclass -class ConvertInfo: - """Dataclass for convert information of the parameter in huggingface checkpoint. - - Args: - param_names(List[str]): List of parameter names in the huggingface checkpoint. - data_type(ModelDataType): Data type of the parameter. - converted_name(str): Name of the converted parameter. - reshape_fn(Callable[[List[torch.tensor]], np.ndarray]): - Function to reshape the tensor from the huggignface checkpoint. - """ - - param_names: List[str] - data_type: ModelDataType - converted_name: str - reshape_fn: Callable[[List[torch.Tensor]], torch.Tensor] diff --git a/friendli/modules/converter/utils.py b/friendli/modules/converter/utils.py deleted file mode 100644 index 4b9588f4..00000000 --- a/friendli/modules/converter/utils.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Converter Utils.""" - -from __future__ import annotations - -import os -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union, cast - -import torch -from peft import PeftConfig # type: ignore[import] # pylint: disable=import-error -from transformers import ( # type: ignore[import] - AutoConfig, - AutoTokenizer, - GenerationConfig, - PretrainedConfig, - PreTrainedTokenizer, -) - -from friendli.enums import ModelDataType -from friendli.errors import ( - CheckpointConversionError, - NotFoundError, - NotSupportedCheckpointError, - TokenizerNotFoundError, -) - - -def convert_to_gpt_j_params(param: torch.Tensor, rotary_dim: int) -> torch.Tensor: - """Reshape weight or bias tensor with rotary embedding to gpt-j format. - - Args: - param (torch.Tensor): Target tensor to convert. Shape must be (num_heads, head_size, ...) - rotary_dim (int): Degree of rotary embedding - - Returns: - Torch tensor that heads are rotated. - - Raises: - CheckpointConversionError: If arguments do not satisfy the requirements. - - """ - if param.ndim < 2: - raise CheckpointConversionError( - "Tensor dimension should be greater or equal than 2 for rotary conversion, " - f"but got {param.ndim}" - ) - - head_size = param.shape[1] - if rotary_dim > head_size: - raise CheckpointConversionError( - f"'rotary_dim' ({rotary_dim}) should be less or equal than 'head_size' ({head_size})" - ) - - param_rot = param[:, :rotary_dim] - param_pass = param[:, rotary_dim:] - - origin_shape = param_rot.shape - param_rot_1 = param_rot[:, : rotary_dim // 2] - param_rot_2 = param_rot[:, rotary_dim // 2 :] - param_rot = torch.stack((param_rot_1, param_rot_2), dim=2).reshape(*origin_shape) - - return torch.cat((param_rot, param_pass), dim=1) - - -def get_tensor_from_state_dict( - state_dict: Dict[str, Any], tensor_name: str -) -> torch.Tensor: - """Get the tensor whose name is 'tensor_name' from 'state_dict'. - - Args: - state_dict (Dict[str, Any]): Model checkpoint's state_dict. - tensor_name (str): Name of tensor to get. - - Returns: - Corresponding torch Tensor. - - Raises: - CheckpointConversionError: If 'tensor_name' does not exist in 'state_dict' - - """ - if tensor_name not in state_dict: - raise CheckpointConversionError( - f"Cannot find '{tensor_name}' in the model checkpoint" - ) - - return state_dict[tensor_name] - - -def get_torch_data_type(data_type: str) -> torch.dtype: - """Get torch data type from Enum.""" - if data_type == ModelDataType.FP16: - return torch.float16 - if data_type == ModelDataType.FP32: - return torch.float32 - if data_type == ModelDataType.BF16: - return torch.bfloat16 - raise CheckpointConversionError( - f"Can't not converted original param to {data_type}." - ) - - -def get_model_data_type(torch_dtype: torch.dtype) -> ModelDataType: - """Get torch data type from Enum.""" - if torch_dtype == torch.float16: - return ModelDataType.FP16 - if torch_dtype == torch.float32: - return ModelDataType.FP32 - if torch_dtype == torch.bfloat16: - return ModelDataType.BF16 - raise CheckpointConversionError(f"{torch_dtype} is not valid dtype.") - - -def convert_tensor_dtype( - param: torch.Tensor, - data_type: Union[ModelDataType, torch.dtype], -) -> torch.Tensor: - """Convert tensor format to the given data type. - - Args: - param (torch.Tensor): The tensor to be converted. - data_type (ModelDataType): The data type of the tensor. - - Returns: - torch.Tensor: The converted tensor. - - """ - dtype_map = { - ModelDataType.FP8_E4M3: torch.float8_e4m3fn, - ModelDataType.BF16: torch.bfloat16, - ModelDataType.FP16: torch.float16, - ModelDataType.FP32: torch.float32, - ModelDataType.INT4: torch.int8, - ModelDataType.INT8: torch.int8, - } - - dtype = dtype_map[data_type] if isinstance(data_type, ModelDataType) else data_type - - if dtype is torch.float8_e4m3fn: - return param.detach().to(dtype).view(dtype=torch.int8).to("cpu") - - if dtype is torch.bfloat16: - return param.detach().to(dtype).to("cpu") - - if data_type is ModelDataType.INT4: - pack_num = 8 // 4 - int4_param = torch.zeros( - (param.shape[0], param.shape[1] // pack_num), - dtype=torch.uint8, - device=param.device, - ) - for col in range(int4_param.shape[1]): - for i in range(pack_num): - int4_param[:, col] |= param[:, col * pack_num + i] << (i * 4) - param = int4_param - - return param.detach().to(dtype).to("cpu") - - -def get_tokenizer( - model_name_or_path: str, - *, - cache_dir: Optional[str] = None, -) -> PreTrainedTokenizer: - """Try to get tokenizer of a pretrained model.""" - try: - tokenizer = AutoTokenizer.from_pretrained( - model_name_or_path, - cache_dir=cache_dir, - trust_remote_code=True, - ) - except OSError as exc: - raise TokenizerNotFoundError(str(exc)) from exc - - if not tokenizer.is_fast: - raise TokenizerNotFoundError( - "This model does not support Friendli-compatible tokenizer" - ) - - if tokenizer.pad_token != "": - tokenizer.pad_token = tokenizer.eos_token - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token - - return tokenizer - - -def save_tokenizer( - model_name_or_path: str, - *, - cache_dir: Optional[str] = None, - save_dir: str, -) -> Tuple[str, ...]: - """Try to save `tokenizer.json` of a pretrained model.""" - if not os.path.isdir(save_dir): - raise NotFoundError(f"Directory '{save_dir}' is not found.") - - tokenizer = get_tokenizer(model_name_or_path, cache_dir=cache_dir) - saved_file_paths = tokenizer.save_pretrained(save_directory=save_dir) - tokenizer_json_path = None - for path in saved_file_paths: - if "tokenizer.json" == os.path.basename(path): - tokenizer_json_path = path - break - - if tokenizer_json_path is None: - raise TokenizerNotFoundError( - "This model has the Friendli-compatible tokenizer implementation, but " - "'tokenizer.json' file is not found." - ) - return saved_file_paths - - -def get_model_generation_config( - model_name_or_path: str, cache_dir: Optional[str] = None -) -> Optional[GenerationConfig]: - """Get HuggingFace model generation config.""" - try: - generation_config = GenerationConfig.from_pretrained( - model_name_or_path, cache_dir=cache_dir, trust_remote_code=True - ) - except (OSError, TypeError): - generation_config = None - - return generation_config - - -def get_model_pretrained_config( - model_name_or_path: str, model_output_path: str, cache_dir: Optional[str] = None -) -> PretrainedConfig: - """Get HuggingFace model configs.""" - try: - config = AutoConfig.from_pretrained( - model_name_or_path, cache_dir=cache_dir, trust_remote_code=True - ) - except OSError as exc: # from AutoConfig.from_pretrained() - config_dir = Path(model_name_or_path) - model_output_dir = Path(model_output_path).parent - if config_dir.exists() and model_output_dir.absolute() == config_dir.absolute(): - raise NotFoundError( - f"'output_dir' ({model_output_dir.as_posix()}) and " - f"'model_name_or_path' ({model_name_or_path}) are the same. " - "In such a case, checkpoints should be prepared in 'output_dir'." - ) from exc - raise NotFoundError(str(exc)) from exc - - return config - - -def get_model_arch(config: PretrainedConfig) -> str: - """Get HuggingFace model architecture from config.""" - model_arch_list = cast(List[str], cast(PretrainedConfig, config).architectures) - if len(model_arch_list) == 0: - raise NotSupportedCheckpointError( - invalid_option=f"'architectures={model_arch_list}'", - valid_options=["non empty list of architectures"], - ) - model_arch = model_arch_list[0] - return model_arch - - -def get_adapter_config( - adapter_name_or_path: str, cache_dir: Optional[str] -) -> PeftConfig: - """Get PeftConfig for Adapter.""" - try: - adapter_config = PeftConfig.from_pretrained( - adapter_name_or_path, cache_dir=cache_dir, trust_remote_code=True - ) - except ValueError as exc: - raise NotFoundError(str(exc)) from exc - return adapter_config diff --git a/friendli/modules/quantizer/__init__.py b/friendli/modules/quantizer/__init__.py deleted file mode 100644 index 9d1a3117..00000000 --- a/friendli/modules/quantizer/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli model quantizer.""" diff --git a/friendli/modules/quantizer/awq/__init__.py b/friendli/modules/quantizer/awq/__init__.py deleted file mode 100644 index 50a1020d..00000000 --- a/friendli/modules/quantizer/awq/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model AWQ Quantizer.""" diff --git a/friendli/modules/quantizer/awq/base.py b/friendli/modules/quantizer/awq/base.py deleted file mode 100644 index 172d214c..00000000 --- a/friendli/modules/quantizer/awq/base.py +++ /dev/null @@ -1,513 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli AWQ Quantizer Base.""" - -from __future__ import annotations - -import gc -from abc import abstractmethod -from dataclasses import fields -from typing import Any, Dict, Iterator, List, Tuple, Type, cast - -import datasets # type: ignore[import] -import torch -from datasets.utils.logging import disable_progress_bar # type: ignore[import] -from tqdm import tqdm - -from friendli.enums import ModelDataType -from friendli.errors import QuantizationError -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import get_tokenizer -from friendli.modules.quantizer.awq.utils import ( - apply_module_clip, - apply_module_scale, - search_module_clip, - search_module_scale, -) -from friendli.modules.quantizer.base import AbstractQuantHook, CommonQuantizer -from friendli.modules.quantizer.layers import WeightOnlyQuantizedLinearLayer -from friendli.modules.quantizer.schema.config import AWQConfig -from friendli.modules.quantizer.schema.data import ( - ModuleName, - QuantInput, - TFQuantInputs, - TFQuantResults, - WeightOnlyQuantResult, -) -from friendli.modules.quantizer.utils import ( - collect_inps, - get_weight_only_quant_scales, - quantized_linear_weight_reshape, - quantized_qkv_weight_reshape, - safe_load_datasets, - scale_reshape, -) - - -class AWQScaler(torch.nn.Module): - """Store AWQ scale before linear layers. - - If the linear layer is quantized, but the previous layer can't be scaled, - then we need to store the AWQ scale in a separate module. This module - is used to store the AWQ scale. - """ - - def __init__(self, in_dim: int): - """Initialize AWQScaler.""" - super().__init__() - self.scale = torch.nn.Parameter(torch.ones(in_dim)) - - def forward(self, x): - """Scale input by AWQ scale.""" - return (x / self.scale.view(1, 1, -1)).to(x.dtype) - - -class AWQHook(AbstractQuantHook): - """Quantization Hook for AWQ.""" - - @abstractmethod - def iter_inspect_modules( - self, - block: torch.nn.Module, - ) -> Iterator[ - Tuple[ - List[torch.nn.Module], - List[Tuple[ModuleName, torch.nn.Linear]], - torch.nn.Module, - ModuleName, - ] - ]: - """Returns iterator of modules to inspect for AWQ scale.""" - - @abstractmethod - def add_pre_scaler( - self, - model: torch.nn.Module, - ) -> torch.nn.Module: - """Add scaler for storing AWQ scale in modules.""" - - @abstractmethod - def get_inspect_module_types( - self, block: torch.nn.Module - ) -> Tuple[Type[torch.nn.Module], ...]: - """Returns the type of inspect modules in transformer block.""" - - def _register_pre_scaler( - self, - linear: torch.nn.Module, - ) -> AWQScaler: - """Register pre-scaler for storing AWQ scale in modules.""" - scaler = AWQScaler(linear.in_features) # type: ignore - - def pre_scaler_hook(_, x: Tuple[Any, ...]) -> Tuple[torch.Tensor, ...]: - return (scaler(x[0]),) - - linear.register_forward_pre_hook(pre_scaler_hook) - return scaler - - def get_quant_result( - self, - quant_inputs: TFQuantInputs, - **kwargs: Any, - ) -> TFQuantResults: - """Get quantization result for AWQ.""" - awq_config = cast(AWQConfig, self.quant_config) - - def get_scale( - quant_input: QuantInput, - ) -> WeightOnlyQuantResult: - weight, name, start, end = ( - quant_input.weight, - quant_input.name, - quant_input.start_offset, - quant_input.end_offset, - ) - weight = weight.to(awq_config.device) - - return get_weight_only_quant_scales( - layer_name=name, - w=weight[start:end], - q_bit=awq_config.awq_args.quant_bit, - q_group_size=awq_config.awq_args.quant_group_size, - ) - - return TFQuantResults( - layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_inputs.layer_index}.", - block=quant_inputs.block, - q=get_scale(quant_inputs.q), - k=get_scale(quant_inputs.k), - v=get_scale(quant_inputs.v), - attn_fc=get_scale(quant_inputs.attn_fc), - ff1=get_scale(quant_inputs.ff1), - ff2=get_scale(quant_inputs.ff2), - ) - - @property - def quant_dtype(self) -> ModelDataType: - """Return the quantization dtype.""" - quant_config = cast(AWQConfig, self.quant_config) - awq_args = quant_config.awq_args - if awq_args.quant_bit == 4: - return ModelDataType.INT4 - return ModelDataType.INT8 - - @property - @abstractmethod - def avoid_clipping_layer_names(self) -> List[str]: - """Return the layer names to avoid clipping.""" - - @property - @abstractmethod - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified layers.""" - - @property - def quantized_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for quantized layers.""" - convert_info_list = [] - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[ - f"{layer_prefix}q.weight_scale", - f"{layer_prefix}k.weight_scale", - f"{layer_prefix}v.weight_scale", - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/awq/scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}q.zeros", - f"{layer_prefix}k.zeros", - f"{layer_prefix}v.zeros", - ], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_attn/awq/zero:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}q.weight", - f"{layer_prefix}k.weight", - f"{layer_prefix}v.weight", - ], - data_type=self.quant_dtype, - converted_name=f"{converted_prefix}attn/c_attn/awq/weight:0", - reshape_fn=quantized_qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc.weight_scale"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/awq/scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc.zeros"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}attn/c_proj/awq/zero:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc.weight"], - data_type=self.quant_dtype, - converted_name=f"{converted_prefix}attn/c_proj/awq/weight:0", - reshape_fn=quantized_linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff1.weight_scale"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/awq/scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff1.zeros"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_fc/awq/zero:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff1.weight"], - data_type=self.quant_dtype, - converted_name=f"{converted_prefix}mlp/c_fc/awq/weight:0", - reshape_fn=quantized_linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff2.weight_scale"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/awq/scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff2.zeros"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_proj/awq/zero:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff2.weight"], - data_type=self.quant_dtype, - converted_name=f"{converted_prefix}mlp/c_proj/awq/weight:0", - reshape_fn=quantized_linear_weight_reshape, - ), - ] - ) - return convert_info_list - - -class AWQQuantizer(CommonQuantizer): - """Quantizer for AWQ.""" - - def check_config(self) -> None: - """Check if the AWQ quantization config is valid.""" - super().check_config() - quant_config = cast(AWQConfig, self.quant_config) - awq_args = quant_config.awq_args - if awq_args.quant_bit not in [4, 8]: - raise QuantizationError( - f"Invalid quant_bit {awq_args.quant_bit} for AWQ." - "You can only use 4 or 8 bit for AWQ." - ) - if awq_args.quant_group_size not in [64]: - raise QuantizationError( - f"Invalid quant_group_size {awq_args.quant_group_size} for AWQ." - "You can only use 64 for AWQ." - ) - - def get_calib_dataset(self) -> datasets.Dataset: - """Get calibration dataset for AWQ.""" - data_cfg = self.quant_config.calibration_dataset - tokenizer = get_tokenizer(self.converter.config.name_or_path) - dataset = safe_load_datasets(data_cfg) - - def preprocess(sample) -> Dict[str, Any]: - """Preprocess dataset for AWQ.""" - return {"input_ids": tokenizer(sample).input_ids} - - disable_progress_bar() - dataset = ( - dataset.shuffle(self.quant_config.seed) - .select(range(data_cfg.num_samples)) - .map(function=preprocess, input_columns=data_cfg.lookup_column_name) - .filter( - lambda sample: torch.tensor(sample).numel() != 0, - input_columns="input_ids", - ) - ) - - return dataset - - def get_batched_samples(self): - """Get batched samples from dataset.""" - dataset = self.get_calib_dataset() - seqlen = self.quant_config.calibration_dataset.max_length - samples = [] - for sample in dataset["input_ids"]: - samples.append(torch.tensor(sample[:seqlen])) - - batched_samples = torch.cat(samples) - if len(batched_samples) // seqlen == 0: - return batched_samples.unsqueeze(0) - - batched_samples = [ - batched_samples[i * seqlen : (i + 1) * seqlen].unsqueeze(0) - for i in range(len(batched_samples) // seqlen) - ] - batched_samples = torch.cat(batched_samples, dim=0) - return batched_samples - - def _apply_awq_scale_clip_block( - self, - block: torch.nn.Module, - block_args: Tuple[Any, ...], - block_kwargs: Dict[str, Any], - ) -> None: - """Search AWQ scale, clipping range and Apply them into a transformer block.""" - # pylint: disable=too-many-locals - - inpsected_mod_types = cast(AWQHook, self.hook).get_inspect_module_types(block) - args_dict, kwargs_dict = collect_inps( - block, - block_args, - block_kwargs, - self.quant_config.device, - tuple([*self.hook.get_linear_layer_types(), *inpsected_mod_types]), - ) - awq_args = cast(AWQConfig, self.quant_config).awq_args - for prev_ops, linear_tuples, module2inspect, module2inspect_name in cast( - AWQHook, self.hook - ).iter_inspect_modules(block): - linear_inp = args_dict[linear_tuples[0][0]][0] - linear_layers = [linear for _, linear in linear_tuples] - - scales = search_module_scale( - module2inspect, - args_dict[module2inspect_name], - kwargs_dict[module2inspect_name], - linear_layers, - linear_inp, - awq_args.quant_group_size, - awq_args.quant_bit, - ) - - apply_module_scale( - prev_ops, - linear_layers, - scales.to(self.quant_config.device), - ) - - for name, _ in linear_tuples: - assert len(args_dict[name]) == 1 - assert torch.equal(args_dict[name][0], linear_inp) - args_dict[name] = (args_dict[name][0].div(scales.view(1, -1)),) - - named_linears = { - name: m - for name, m in block.named_modules() - if isinstance(m, torch.nn.Linear) - } - for name, linear in named_linears.items(): - if any( - ( - avoid in name - for avoid in cast(AWQHook, self.hook).avoid_clipping_layer_names - ) - ): - continue - max_val = search_module_clip( - linear.weight, - args_dict[name][0], - awq_args.quant_group_size, - awq_args.quant_bit, - n_sample_token=self.quant_config.calibration_dataset.num_samples, - ) - apply_module_clip( - max_val.to(self.quant_config.device), - linear, - ) - - def get_input_kwargs_tf_blocks( - self, - model: torch.nn.Module, - ) -> Tuple[List[Tuple[Any, ...]], List[Dict[str, Any]]]: - """Gather input tensor and kwargs from the designated pytorch module.""" - block_args = [] - block_kwargs = [] - - num_tf_blocks = len(self.hook.get_tf_blocks(model)) - progress_bar = tqdm( - range(num_tf_blocks), - total=num_tf_blocks, - desc="Collect args for transformer blocks..", - ) - - def hook(m, args, kwargs): # pylint: disable=unused-argument - block_args.append( - tuple( - (t.detach().cpu() if isinstance(t, torch.Tensor) else t) - for t in args - ) - ) - block_kwargs.append( - { - k: (v.detach().cpu() if isinstance(v, torch.Tensor) else v) - for k, v in kwargs.items() - } - ) - progress_bar.update() - - removables = [] - for tf_block in self.hook.get_tf_blocks(model): - removables.append( - tf_block.register_forward_pre_hook(hook, with_kwargs=True) - ) - - batched_samples = self.get_batched_samples() - model(batched_samples.to(self.quant_config.device), use_cache=False) - - for removable in removables: - removable.remove() - - return block_args, block_kwargs - - def get_attributes(self) -> Dict[str, Any]: - """Return the attributes of the converted model.""" - attributes = self.converter.get_attributes() - awq_args = cast(AWQConfig, self.quant_config).awq_args - attributes["quant_scheme"] = self.quant_config.mode.value # awq - attributes["quant_group_size"] = awq_args.quant_group_size - attributes["quant_bit"] = awq_args.quant_bit - return attributes - - @torch.no_grad() - def _apply_awq_scale_clip( - self, - model: torch.nn.Module, - ) -> None: - """Search AWQ scale, clipping range and Apply them into model.""" - # pylint: disable=too-many-locals - model.eval() - with self._try_offload_model(model): - tf_blocks = self.hook.get_tf_blocks(model) - block_args, block_kwargs = self.get_input_kwargs_tf_blocks(model) - - gc.collect() - torch.cuda.empty_cache() - - for block, args, kwargs in tqdm( - zip( - tf_blocks, - block_args, - block_kwargs, - ), - total=len(tf_blocks), - desc="Search and Apply AWQ Scale, Clip range..", - ): - self._apply_awq_scale_clip_block(block, args, kwargs) - gc.collect() - torch.cuda.empty_cache() - - @torch.no_grad() - def pre_quantize( - self, - model: torch.nn.Module, - ) -> None: - """Pre-procedure that should be called before quantize() is called.""" - model = cast(AWQHook, self.hook).add_pre_scaler(model) - self._apply_awq_scale_clip(model) - - @torch.no_grad() - def quantize( - self, - model: torch.nn.Module, - ) -> torch.nn.Module: - """Quantize model with AWQ.""" - model.eval() - for quant_input in tqdm( - self.hook.iter_tf_quant_inputs(model), - total=len(self.hook.get_tf_blocks(model)), - desc="Quantize model..", - ): - assert isinstance(quant_input, TFQuantInputs) - quant_result = cast(AWQHook, self.hook).get_quant_result( - quant_input, quant_config=cast(AWQConfig, self.quant_config) - ) - for field in fields(quant_result): - layer_quant_result = getattr(quant_result, field.name) - if isinstance(layer_quant_result, WeightOnlyQuantResult): - layer = model.get_submodule(layer_quant_result.module_name) - q_layer = WeightOnlyQuantizedLinearLayer.from_layer( - layer, layer_quant_result - ) - quant_result.block.add_module(field.name, q_layer) - - return model diff --git a/friendli/modules/quantizer/awq/models/gpt_neox.py b/friendli/modules/quantizer/awq/models/gpt_neox.py deleted file mode 100644 index 8d48328a..00000000 --- a/friendli/modules/quantizer/awq/models/gpt_neox.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPTNeoXForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type - -import torch - -from friendli.enums import ModelDataType -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.quantizer.awq.base import AWQHook -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.utils import scale_reshape - - -class AWQGPTNeoXHook(AWQHook): - """AWQ Hook for GPTNeoXForCausalLM.""" - - def __init__(self, quant_config, converter): - """Initialize AWQGPTNeoXHook.""" - super().__init__(quant_config, converter) - config = converter.config - self.data_type = converter.data_type - self.num_attention_heads = config.num_attention_heads - self.num_kv_attention_heads = config.num_attention_heads - self.hidden_size = config.hidden_size - self.head_size = self.hidden_size // self.num_attention_heads - self.rotary_dim = int(self.head_size * config.rotary_pct) - assert config.use_parallel_residual == True - - def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module: - """Adds scaler to GPTNeoXForCausalLM.""" - for tf_block in self.get_tf_blocks(model): - attn_fc_scaler = self._register_pre_scaler( - tf_block.attention.dense, - ) - tf_block.attention.add_module("scaler", attn_fc_scaler) - ff2_scaler = self._register_pre_scaler(tf_block.mlp.dense_4h_to_h) - tf_block.mlp.add_module("scaler", ff2_scaler) - return model - - def get_inspect_module_types( - self, block: torch.nn.Module - ) -> Tuple[Type[torch.nn.Module], ...]: - """Returns the type of linear layer (etc. qkv, linear layer) in transformer block.""" - return (type(block.attention), type(block.mlp)) - - def iter_inspect_modules( - self, - block: torch.nn.Module, - ) -> Iterator[ - Tuple[ - List[torch.nn.Module], - List[Tuple[ModuleName, torch.nn.Linear]], - torch.nn.Module, - ModuleName, - ] - ]: - """Returns iterator of layers in modules.""" - # qkv proj - yield ( - [block.input_layernorm], - [("attention.query_key_value", block.attention.query_key_value)], - block.attention, - "attention", - ) - # attn out proj - yield ( - [block.attention.scaler], - [("attention.dense", block.attention.dense)], - block.attention.dense, - "attention.dense", - ) - # ff1 - yield ( - [block.post_attention_layernorm], - [("mlp.dense_h_to_4h", block.mlp.dense_h_to_4h)], - block.mlp, - "mlp", - ) - # ff2 - yield ( - [block.mlp.scaler], - [("mlp.dense_4h_to_h", block.mlp.dense_4h_to_h)], - block.mlp.dense_4h_to_h, - "mlp.dense_4h_to_h", - ) - - def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of GPTNeoXForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - qkv_weight = self.converter.qkv_weight_reshape( - [decoder_layer.attention.query_key_value.weight] - ).transpose( - 0, 1 - ) # [OutDim, InDim] - attn_weight_outdim = qkv_weight.size(0) # OutDim - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attention.query_key_value", - 0, - attn_weight_outdim // 3, - ), - k=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attention.query_key_value", - attn_weight_outdim // 3, - attn_weight_outdim // 3 * 2, - ), - v=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attention.query_key_value", - attn_weight_outdim // 3 * 2, - attn_weight_outdim, - ), - attn_fc=QuantInput( - decoder_layer.attention.dense.weight, - f"{self.quantized_layer_prefix}{index}.attention.dense", - None, - None, - ), - ff1=QuantInput( - decoder_layer.mlp.dense_h_to_4h.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h", - None, - None, - ), - ff2=QuantInput( - decoder_layer.mlp.dense_4h_to_h.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in GPTNeoXForCausalLM.""" - return (torch.nn.Linear,) - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in GPTNeoXForCausalLM.""" - return model.gpt_neox.layers # type: ignore - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified layers.""" - convert_info_list = [] - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}attention.scaler.scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_proj/awq/pre_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}mlp.scaler.scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_proj/awq/pre_scale:0", - reshape_fn=scale_reshape, - ), - ] - ) - return convert_info_list - - @property - def avoid_clipping_layer_names(self) -> List[str]: - """Returns the layer names which should be avoided for AWQ clipping.""" - return ["query_key_value"] diff --git a/friendli/modules/quantizer/awq/models/gptj.py b/friendli/modules/quantizer/awq/models/gptj.py deleted file mode 100644 index da2e81dc..00000000 --- a/friendli/modules/quantizer/awq/models/gptj.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPTJForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type - -import torch - -from friendli.enums import ModelDataType -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.quantizer.awq.base import AWQHook -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.utils import scale_reshape - - -class AWQGPTJHook(AWQHook): - """AWQ Hook for GPTJForCausalLM.""" - - def __init__(self, quant_config, converter): - """Initialize AWQGPTJHook.""" - super().__init__(quant_config, converter) - config = converter.config - self.data_type = converter.data_type - self.num_attention_heads = config.num_attention_heads - self.num_kv_attention_heads = config.num_attention_heads - self.hidden_size = config.hidden_size - self.head_size = self.hidden_size // self.num_attention_heads - self.rotary_dim = config.rotary_dim - - def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module: - """Adds scaler to GPTJForCausalLM.""" - for tf_block in self.get_tf_blocks(model): - ff2_scaler = self._register_pre_scaler(tf_block.mlp.fc_out) - tf_block.mlp.add_module("ff2_scaler", ff2_scaler) - return model - - def get_inspect_module_types( - self, block: torch.nn.Module - ) -> Tuple[Type[torch.nn.Module], ...]: - """Returns the type of linear layer (etc. qkv, linear layer) in transformer block.""" - return (type(block.attn), type(block.mlp), type(block)) - - def iter_inspect_modules( - self, - block: torch.nn.Module, - ) -> Iterator[ - Tuple[ - List[torch.nn.Module], - List[Tuple[ModuleName, torch.nn.Linear]], - torch.nn.Module, - ModuleName, - ] - ]: - """Returns iterator of layers in modules.""" - # qkv proj - yield ( - [block.ln_1], - [ - ("attn.q_proj", block.attn.q_proj), - ("attn.k_proj", block.attn.k_proj), - ("attn.v_proj", block.attn.v_proj), - ("mlp.fc_in", block.mlp.fc_in), - ], - block, - "", - ) - # attn out proj - yield ( - [block.attn.v_proj], - [("attn.out_proj", block.attn.out_proj)], - block.attn.out_proj, - "attn.out_proj", - ) - # ff2 - yield ( - [block.mlp.ff2_scaler], - [("mlp.fc_out", block.mlp.fc_out)], - block.mlp.fc_out, - "mlp.fc_out", - ) - - def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of GPTJForCausalLM.""" - for index, tf_block in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - yield TFQuantInputs( - layer_index=index, - block=tf_block, - q=QuantInput( - tf_block.attn.q_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.q_proj", - None, - None, - ), - k=QuantInput( - tf_block.attn.k_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.k_proj", - None, - None, - ), - v=QuantInput( - tf_block.attn.v_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.v_proj", - None, - None, - ), - attn_fc=QuantInput( - tf_block.attn.out_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - None, - None, - ), - ff1=QuantInput( - tf_block.mlp.fc_in.weight, - f"{self.quantized_layer_prefix}{index}.mlp.fc_in", - None, - None, - ), - ff2=QuantInput( - tf_block.mlp.fc_out.weight, - f"{self.quantized_layer_prefix}{index}.mlp.fc_out", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in GPTJForCausalLM.""" - return (torch.nn.Linear,) - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in GPTJForCausalLM.""" - return model.transformer.h # type: ignore - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified layers.""" - convert_info_list = [] - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.append( - ConvertInfo( - param_names=[f"{layer_prefix}mlp.ff2_scaler.scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_proj/awq/pre_scale:0", - reshape_fn=scale_reshape, - ) - ) - return convert_info_list - - @property - def avoid_clipping_layer_names(self) -> List[str]: - """Returns the layer names which should be avoided for AWQ clipping.""" - return ["q_proj", "k_proj"] diff --git a/friendli/modules/quantizer/awq/models/llama.py b/friendli/modules/quantizer/awq/models/llama.py deleted file mode 100644 index f59bc0cf..00000000 --- a/friendli/modules/quantizer/awq/models/llama.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli LlamaForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any, Iterator, List, Tuple, Type, cast - -import torch - -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.quantizer.awq.base import AWQHook -from friendli.modules.quantizer.schema.config import AWQConfig -from friendli.modules.quantizer.schema.data import ( - ModuleName, - QuantInput, - TFQuantInputs, - TFQuantResults, - WeightOnlyQuantResult, -) -from friendli.modules.quantizer.utils import ( - get_weight_only_quant_scales, - quantized_linear_weight_reshape, - scale_reshape, -) - - -@dataclass -class LlamaTFQuantInputs(TFQuantInputs): - """Dataclass for quantization input per layer in LlamaForCausalLM.""" - - ff_gate: QuantInput - - -@dataclass -class LlamaTFQuantResults(TFQuantResults): - """Dataclass for quantization result per layer in LlamaForCausalLM.""" - - ff_gate: WeightOnlyQuantResult - - -class AWQLlamaHook(AWQHook): - """AWQ Hook for LlamaForCausalLM.""" - - def __init__(self, quant_config, converter): - """Initialize AWQLlamaHook.""" - super().__init__(quant_config, converter) - config = converter.config - self.data_type = converter.data_type - self.num_attention_heads = config.num_attention_heads - if config.num_key_value_heads is None: - self.num_kv_attention_heads = self.num_attention_heads - else: - self.num_kv_attention_heads = config.num_key_value_heads - self.hidden_size = config.hidden_size - self.head_size = self.hidden_size // self.num_attention_heads - self.rotary_dim = self.head_size - self.scale_attn_fc = self.num_attention_heads == self.num_kv_attention_heads - - def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module: - """Adds scaler to LlamaForCausalLM.""" - return model - - def get_inspect_module_types( - self, block: torch.nn.Module - ) -> Tuple[type[torch.nn.Module], ...]: - """Returns the layer types in inspected blocks.""" - return (type(block.self_attn), type(block.mlp)) - - def iter_inspect_modules( - self, - block: torch.nn.Module, - ) -> Iterator[ - Tuple[ - List[torch.nn.Module], - List[Tuple[ModuleName, torch.nn.Linear]], - torch.nn.Module, - ModuleName, - ] - ]: - """Returns iterator of layers in blocks.""" - # qkv proj - yield ( - [block.input_layernorm], - [ - ("self_attn.q_proj", block.self_attn.q_proj), - ("self_attn.k_proj", block.self_attn.k_proj), - ("self_attn.v_proj", block.self_attn.v_proj), - ], - block.self_attn, - "self_attn", - ) - # attn out proj - if self.scale_attn_fc: - yield ( - [block.self_attn.v_proj], - [("self_attn.o_proj", block.self_attn.o_proj)], - block.self_attn.o_proj, - "self_attn.o_proj", - ) - # ff1 - yield ( - [block.post_attention_layernorm], - [ - ("mlp.up_proj", block.mlp.up_proj), - ("mlp.gate_proj", block.mlp.gate_proj), - ], - block.mlp, - "mlp", - ) - # ff2 - yield ( - [block.mlp.up_proj], - [("mlp.down_proj", block.mlp.down_proj)], - block.mlp.down_proj, - "mlp.down_proj", - ) - - def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of LlamaForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.self_attn - q_weight, k_weight, v_weight = ( - self.converter.qkv_weight_reshape( - [ - self_attn.q_proj.weight, - self_attn.k_proj.weight, - self_attn.v_proj.weight, - ] - ) - .transpose(0, 1) - .split( - [ - self.converter.decoder_num_attention_heads - * self.converter.decoder_head_size, - self.converter.decoder_num_kv_attention_heads - * self.converter.decoder_head_size, - self.converter.decoder_num_kv_attention_heads - * self.converter.decoder_head_size, - ], - dim=0, - ) - ) - fc1 = decoder_layer.mlp.up_proj - ff_gate = decoder_layer.mlp.gate_proj - fc2 = decoder_layer.mlp.down_proj - - yield LlamaTFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - q_weight, - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", - None, - None, - ), - k=QuantInput( - k_weight, - f"{self.quantized_layer_prefix}{index}.self_attn.k_proj", - None, - None, - ), - v=QuantInput( - v_weight, - f"{self.quantized_layer_prefix}{index}.self_attn.v_proj", - None, - None, - ), - attn_fc=QuantInput( - self_attn.o_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.mlp.up_proj", - None, - None, - ), - ff_gate=QuantInput( - ff_gate.weight, - f"{self.quantized_layer_prefix}{index}.mlp.gate_proj", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.mlp.down_proj", - None, - None, - ), - ) - - def get_quant_result( - self, - quant_input: TFQuantInputs, - **kwargs: Any, - ) -> TFQuantResults: - """Get quantization result for a specific layer in LlamaForCausalLM.""" - awq_config = cast(AWQConfig, self.quant_config) - - def get_scale(quant_input: QuantInput) -> WeightOnlyQuantResult: - weight, name, start, end = ( - quant_input.weight, - quant_input.name, - quant_input.start_offset, - quant_input.end_offset, - ) - weight = weight.to(awq_config.device) - - return get_weight_only_quant_scales( - layer_name=name, - w=weight[start:end], - q_bit=awq_config.awq_args.quant_bit, - q_group_size=awq_config.awq_args.quant_group_size, - ) - - quant_input = cast(LlamaTFQuantInputs, quant_input) - return LlamaTFQuantResults( - layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_input.layer_index}.", - block=quant_input.block, - q=get_scale(quant_input.q), - k=get_scale(quant_input.k), - v=get_scale(quant_input.v), - attn_fc=get_scale(quant_input.attn_fc), - ff1=get_scale(quant_input.ff1), - ff_gate=get_scale(quant_input.ff_gate), - ff2=get_scale(quant_input.ff2), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in LlamaForCausalLM.""" - return (torch.nn.Linear,) - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in LlamaForCausalLM.""" - return model.model.layers - - @property - def quantized_param_names(self) -> List[str]: - """Returns the parameter names in LlamaForCausalLM.""" - param_names = super().quantized_param_names - for i in range(self.converter.decoder_layer_num): - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - param_names.append( - f"{converted_prefix}mlp/c_gate/weight:0", - ) - return param_names - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified layers.""" - return [] - - @property - def avoid_clipping_layer_names(self) -> List[str]: - """Returns the layer names which should be avoided for AWQ clipping.""" - return ["q_proj", "k_proj"] - - @property - def quantized_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the convert_info_list for quantized layers.""" - convert_info_list = super().quantized_convert_info_list - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ff_gate.weight_scale"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_gate/awq/scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff_gate.zeros"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}mlp/c_gate/awq/zero:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff_gate.weight"], - data_type=self.quant_dtype, - converted_name=f"{converted_prefix}mlp/c_gate/awq/weight:0", - reshape_fn=quantized_linear_weight_reshape, - ), - ] - ) - return convert_info_list diff --git a/friendli/modules/quantizer/awq/models/mpt.py b/friendli/modules/quantizer/awq/models/mpt.py deleted file mode 100644 index 6c60ca58..00000000 --- a/friendli/modules/quantizer/awq/models/mpt.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli MPTForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type - -import torch - -from friendli.enums import ModelDataType -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.quantizer.awq.base import AWQHook -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.utils import scale_reshape - - -class AWQMPTHook(AWQHook): - """AWQ Hook for MPTForCausalLM.""" - - def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module: - """Adds scaler to MPTForCausalLM.""" - for tf_block in self.get_tf_blocks(model): - attn_fc_scaler = self._register_pre_scaler( - tf_block.attn.out_proj, - ) - tf_block.attn.add_module("scaler", attn_fc_scaler) - ff2_scaler = self._register_pre_scaler(tf_block.ffn.down_proj) - tf_block.ffn.add_module("scaler", ff2_scaler) - return model - - def get_inspect_module_types( - self, block: torch.nn.Module - ) -> Tuple[Type[torch.nn.Module], ...]: - """Returns the type of linear layer (etc. qkv, linear layer) in transformer block.""" - return (type(block.attn), type(block.ffn)) - - def iter_inspect_modules( - self, - block: torch.nn.Module, - ) -> Iterator[ - Tuple[ - List[torch.nn.Module], - List[Tuple[ModuleName, torch.nn.Linear]], - torch.nn.Module, - ModuleName, - ] - ]: - """Returns iterator of layers in modules.""" - # qkv proj - yield ( - [block.norm_1], - [("attn.Wqkv", block.attn.Wqkv)], - block.attn, - "attn", - ) - # attn out proj - yield ( - [block.attn.scaler], - [("attn.out_proj", block.attn.out_proj)], - block.attn.out_proj, - "attn.out_proj", - ) - # ff1 - yield ( - [block.norm_2], - [("ffn.up_proj", block.ffn.up_proj)], - block.ffn, - "ffn", - ) - # ff2 - yield ( - [block.ffn.scaler], - [("ffn.down_proj", block.ffn.down_proj)], - block.ffn.down_proj, - "ffn.down_proj", - ) - - def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of MPTForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.attn - q_outdim = ( - self.converter.decoder_num_attention_heads - * self.converter.decoder_head_size - ) - kv_outdim = ( - self.converter.decoder_num_kv_attention_heads - * self.converter.decoder_head_size - ) - qkv_outdim = self_attn.Wqkv.weight.size(0) - assert qkv_outdim == q_outdim + kv_outdim * 2 - fc1 = decoder_layer.ffn.up_proj # type: ignore - fc2 = decoder_layer.ffn.down_proj # type: ignore - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - self_attn.Wqkv.weight, # type: ignore - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - 0, - q_outdim, - ), - k=QuantInput( - self_attn.Wqkv.weight, # type: ignore - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - q_outdim, - q_outdim + kv_outdim, - ), - v=QuantInput( - self_attn.Wqkv.weight, # type: ignore - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - q_outdim + kv_outdim, - qkv_outdim, - ), - attn_fc=QuantInput( - self_attn.out_proj.weight, # type: ignore - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight, # type: ignore - f"{self.quantized_layer_prefix}{index}.ffn.up_proj", - None, - None, - ), - ff2=QuantInput( - fc2.weight, # type: ignore - f"{self.quantized_layer_prefix}{index}.ffn.down_proj", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in MPTForCausalLM.""" - return (torch.nn.Linear,) - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in MPTForCausalLM.""" - return model.transformer.blocks # type: ignore - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified layers.""" - convert_info_list = [] - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}attn.scaler.scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_proj/awq/pre_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ffn.scaler.scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_proj/awq/pre_scale:0", - reshape_fn=scale_reshape, - ), - ] - ) - return convert_info_list - - @property - def avoid_clipping_layer_names(self) -> List[str]: - """Returns the layer names which should be avoided for AWQ clipping.""" - return ["Wqkv"] diff --git a/friendli/modules/quantizer/awq/utils.py b/friendli/modules/quantizer/awq/utils.py deleted file mode 100644 index c6efdec4..00000000 --- a/friendli/modules/quantizer/awq/utils.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -# Copyright (c) 2023 MIT HAN Lab -# MIT License - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""Friendli AWQ Quantizer Util.""" - -from __future__ import annotations - -import gc -from typing import Any, Dict, Iterable, List, Tuple - -import torch - - -def pseudo_quantize_tensor(w: torch.Tensor, q_bit: int = 8, q_group_size: int = -1): - """Pseudo quantize tensor.""" - org_w_shape = w.shape - w = w.reshape(-1, q_group_size) - max_val = w.amax(dim=1, keepdim=True) - min_val = w.amin(dim=1, keepdim=True) - max_int = 2**q_bit - 1 - min_int = 0 - scales = (max_val - min_val).clamp(min=1e-5) / max_int - zeros = (-torch.round(min_val / scales)).clamp_(min_int, max_int) - - assert torch.isnan(scales).sum() == 0 - assert torch.isnan(w).sum() == 0 - - w = ( - torch.clamp(torch.round(w / scales) + zeros, min_int, max_int) - zeros - ) * scales - assert torch.isnan(w).sum() == 0 - - w = w.reshape(org_w_shape) - - return w - - -@torch.no_grad() -def get_weight_scale(weight: torch.Tensor, q_group_size=-1): - """Get weight scale for AWQ.""" - org_shape = weight.shape - if q_group_size > 0: - weight = weight.view(-1, q_group_size) - scale = weight.abs() / weight.abs().amax(dim=1, keepdim=True) - scale = scale.view(org_shape) - scale = scale.mean(0) - return scale - - -@torch.no_grad() -def get_act_scale(x): - """Get activation scale for AWQ.""" - return x.abs().view(-1, x.shape[-1]).mean(0) - - -def search_module_scale( - module: torch.nn.Module, - module_args: Tuple[Any, ...], - module_kwargs: Dict[str, Any], - linears2scale: Iterable[torch.nn.Linear], - linear_inp: torch.Tensor, - q_group_size: int, - q_bit: int, -) -> torch.Tensor: - """Search the AWQ scale for a module.""" - # pylint: disable=too-many-locals - weight = torch.cat([_m.weight for _m in linears2scale], dim=0) # type: ignore - with torch.no_grad(): - org_out = module(*module_args, **module_kwargs) - if isinstance(org_out, tuple): - org_out = org_out[0] - - x_max = get_act_scale(linear_inp) - w_max = get_weight_scale(weight, q_group_size) - del weight - gc.collect() # type: ignore - torch.cuda.empty_cache() - - best_error = float("inf") - best_scales = torch.zeros(x_max.shape[0], device=x_max.device) - n_grid = 20 - history = [] - org_sd = {k: v.to("cpu", copy=True) for k, v in module.state_dict().items()} - for grid in range(n_grid): - ratio = grid * 1.0 / n_grid - scales = (x_max.pow(ratio) / w_max.pow(1 - ratio)).clamp(min=1e-4).view(-1) - scales = scales / (scales.max() * scales.min()).sqrt() - for fc in linears2scale: - fc.weight.mul_(scales.view(1, -1).to(fc.weight.device)) # type: ignore - fc.weight.data = pseudo_quantize_tensor( - w=fc.weight.data, # type: ignore - q_bit=q_bit, - q_group_size=q_group_size, - ) / (scales.view(1, -1)) - - out = module(*module_args, **module_kwargs) - if isinstance(out, tuple): - out = out[0] - - loss = (org_out - out).float().pow(2).mean().item() # float prevents overflow - history.append(loss) - is_best = loss < best_error - if is_best: - best_error = loss - best_scales = scales - module.load_state_dict(org_sd) - best_scales = best_scales.view(-1) - - assert torch.isnan(best_scales).sum() == 0, best_scales - return best_scales.detach() - - -def apply_module_scale( - prev_ops: List[torch.nn.Module], - linear_layers: Iterable[torch.nn.Linear], - scales: torch.Tensor, -) -> None: - """Apply AWQ Scale for Module, and return the scaled input for Clipping.""" - for prev_op in prev_ops: - for _, param in prev_op.named_parameters(recurse=False): - if isinstance(prev_op, torch.nn.Linear): - # TODO: handle bias - assert len(param.data.shape) == 2 - param.data.div_(scales.view(-1, 1)) - else: - assert param.data.shape == scales.shape - param.data.div_(scales) - - for layer in linear_layers: - layer.weight.data.mul_(scales.view(1, -1)) - - -def search_module_clip( - w: torch.Tensor, - inp: torch.Tensor, - q_group_size: int, - q_bit: int, - n_grid=20, - max_shrink=0.5, - n_sample_token=512, -) -> torch.Tensor: - """Search the best clip for a module.""" - # pylint: disable=too-many-locals - # w [co, ci] -> [co, 1, n_group, group size] - # inp [n_token, ci] -> [1, n_token, n_group, group size] - w = w.view(w.shape[0], 1, -1, q_group_size) - - inp = inp.view(-1, inp.shape[-1]) - inp = inp.reshape(1, inp.shape[0], -1, q_group_size) - inp = inp[:, 0 :: inp.shape[1] // n_sample_token] - - oc_batch_size = 256 if w.shape[0] % 256 == 0 else 64 # prevent OOM - assert w.shape[0] % oc_batch_size == 0 - w_all = w - best_max_val_all = [] - - for i_b in range(w.shape[0] // oc_batch_size): - w = w_all[i_b * oc_batch_size : (i_b + 1) * oc_batch_size] - - org_max_val = w.abs().amax(dim=-1, keepdim=True) # co, 1, n_group, 1 - - best_max_val = org_max_val.clone() - min_errs = torch.ones_like(org_max_val) * 1e9 - inp = inp.to(w.device) - org_out = (inp * w).sum(dim=-1) # co, n_token, n_group - - for i_s in range(int(max_shrink * n_grid)): - max_val = org_max_val * (1 - i_s / n_grid) - min_val = -max_val - cur_w = torch.clamp(w, min_val, max_val) - q_w = pseudo_quantize_tensor( - w=cur_w, - q_bit=q_bit, - q_group_size=q_group_size, - ) - cur_out = (inp * q_w).sum(dim=-1) - - # co, 1, n_group, 1 - err = (cur_out - org_out).pow(2).mean(dim=1).view(min_errs.shape) - del cur_w - del cur_out - cur_best_idx = err < min_errs - min_errs[cur_best_idx] = err[cur_best_idx] - best_max_val[cur_best_idx] = max_val[cur_best_idx] - best_max_val_all.append(best_max_val) - - best_max_val = torch.cat(best_max_val_all, dim=0) - - del inp - del org_out - gc.collect() - torch.cuda.empty_cache() - - return best_max_val.squeeze(1) - - -def apply_module_clip( - max_val: torch.Tensor, - layer: torch.nn.Linear, -): - """Apply AWQ Clip for Module.""" - max_val = max_val.to(layer.weight.device) # type: ignore - org_shape = layer.weight.shape - layer.weight.data = layer.weight.data.reshape(*max_val.shape[:2], -1) # type: ignore - layer.weight.data = torch.clamp(layer.weight.data, -max_val, max_val) - layer.weight.data = layer.weight.data.reshape(org_shape) # type: ignore diff --git a/friendli/modules/quantizer/base.py b/friendli/modules/quantizer/base.py deleted file mode 100644 index ea97e092..00000000 --- a/friendli/modules/quantizer/base.py +++ /dev/null @@ -1,507 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Quantizer Base.""" - -from __future__ import annotations - -import os -from abc import ABC, abstractmethod -from collections.abc import Generator -from contextlib import contextmanager -from typing import Any, Dict, Iterator, List, Tuple, Type, Union, cast - -import datasets # type: ignore[import] -import huggingface_hub # type: ignore[import] -import numpy as np -import torch -from torch.nn.modules import Module -from tqdm import tqdm - -from friendli.enums import ( - QuantDatasetFormat, # TODO: move this to friendli/modules/converter/enums.py -) -from friendli.enums import ModelDataType -from friendli.errors import NotSupportedQuantConfigError -from friendli.logging import logger -from friendli.modules.converter.base import DECODER_PREFIX, OneOfConverter -from friendli.modules.converter.interface import ModelConversionInterface -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import get_tokenizer, get_torch_data_type -from friendli.modules.quantizer.layers import WeightActQuantizedLinearLayer -from friendli.modules.quantizer.schema.config import OneOfQuantConfig -from friendli.modules.quantizer.schema.data import ( - HFTFQuantInputs, - ModuleName, - TFQuantInputs, - TFQuantResults, - WeightActQuantResult, -) -from friendli.modules.quantizer.utils import ( - collect_stats, - offload_module_sequence, - safe_load_datasets, - send_model_to_device, -) - - -class AbstractQuantHook(ABC): - """Quantization Hook for a specific model architecture.""" - - def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter): - """Initialize the Quantization Hook. - - Args: - quant_config: Quantization configuration. - converter (OneOfConverter): Converter for a specific model architecture. - """ - self.quant_config = quant_config - self.converter = converter - - @abstractmethod - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks.""" - - @abstractmethod - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the type of linear layer (etc. qkv, linear layer) in transformer block.""" - - @abstractmethod - def iter_tf_quant_inputs( - self, model: torch.nn.Module - ) -> Union[Iterator[TFQuantInputs], Iterator[HFTFQuantInputs]]: - """Returns the layers which should be quantized in transformer blocks.""" - - @abstractmethod - def get_quant_result( - self, - quant_inputs: TFQuantInputs, - **kwargs: Any, - ) -> TFQuantResults: - """Returns the quantization result of the layer.""" - - @property - @abstractmethod - def quantized_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for quantized layers.""" - - @property - @abstractmethod - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified layers.""" - - @property - def quantized_layer_prefix(self) -> str: - """Returns the prefix of the transformer block name.""" - return self.converter.decoder_layer_prefix - - @property - def quantized_param_names(self) -> List[str]: - """Return the parameter names of quantized layers.""" - param_names = [] - for i in range(self.converter.decoder_layer_num): - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - param_names.append(f"{converted_prefix}attn/c_attn/weight:0") - param_names.append(f"{converted_prefix}attn/c_proj/weight:0") - param_names.append(f"{converted_prefix}mlp/c_fc/weight:0") - param_names.append(f"{converted_prefix}mlp/c_proj/weight:0") - - return param_names - - -class AbstractQuantizer(ABC): - """Abstract Quantizer for a specific model architecture.""" - - def __init__( - self, - hook: AbstractQuantHook, - config: OneOfQuantConfig, - converter: OneOfConverter, - ): - """Initialize the Quantizer. - - Args: - hook (AbstractQuantHook): Quantization Hook for a specific model architecture - config (CommonQuantConfig): Quantization configuration. - converter (OneOfConverter): Converter for a specific model architecture. - - """ - self.hook = hook - self.quant_config = config - self.converter = converter - - @abstractmethod - def get_calib_dataset( - self, - ) -> datasets.Dataset: - """Get calibration dataset.""" - - @abstractmethod - def pre_quantize( - self, - model: torch.nn.Module, - ) -> None: - """Pre-procedure that should be called before quantize() is called.""" - - @abstractmethod - def quantize( - self, - model: torch.nn.Module, - ) -> torch.nn.Module: - """Setting Quantizer from config and Quantize model.""" - - -class CommonQuantizer(AbstractQuantizer, ModelConversionInterface): - """Common Quantizer.""" - - def check_config(self) -> None: - """Check if the quantization config is valid.""" - self.converter.check_config() - calibration_dataset_config = self.quant_config.calibration_dataset - data_path_or_name = calibration_dataset_config.path_or_name - percentile = self.quant_config.percentile - if percentile <= 0 or percentile > 100: - raise NotSupportedQuantConfigError( - invalid_option=str(percentile), - valid_options=["0 < percentile <= 100"], - ) - if not os.path.exists(data_path_or_name): - data_name = data_path_or_name.split(":")[0] - if data_name not in ( - data.id for data in huggingface_hub.list_datasets(search=data_name) - ): - raise NotSupportedQuantConfigError( - invalid_option=data_name, - valid_options=["datasets on the huggingface hub", "local path"], - ) - else: - if calibration_dataset_config.format not in QuantDatasetFormat: - raise NotSupportedQuantConfigError( - invalid_option=calibration_dataset_config.format, - valid_options=list(QuantDatasetFormat), - ) - try: - torch.device(self.quant_config.device) - except ValueError as err: - raise NotSupportedQuantConfigError( - invalid_option=self.quant_config.device, - valid_options=["cpu", "cuda"], - ) from err - - def get_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Get List of the convert informations for the model.""" - convert_info_list = self.converter.get_convert_info_list() - new_convert_info_list = [] - for convert_info in convert_info_list: - if convert_info.converted_name in self.hook.quantized_param_names: - continue - new_convert_info_list.append(convert_info) - - return ( - new_convert_info_list - + self.hook.quantized_convert_info_list - + self.hook.modified_layers_convert_info_list - ) - - def get_attributes(self) -> Dict[str, Any]: - """Return the attributes of the converted model.""" - return self.converter.get_attributes() - - @contextmanager - def _try_offload_model(self, model: torch.nn.Module): - if not self.quant_config.offload: - logger.info("Offloading not enabled. Skipping.") - model.to(self.quant_config.device) - yield - else: - logger.info("Offloading enabled.") - tf_blocks = self.hook.get_tf_blocks(model) - send_model_to_device(model, self.quant_config.device, exclude=tf_blocks) - with offload_module_sequence(tf_blocks, self.quant_config.device): - yield - - def convert( - self, - model: torch.nn.Module, - convert_info_list: List[ConvertInfo], - save_numpy_format: bool = True, - ) -> Generator[Tuple[str, Union[np.ndarray, torch.Tensor]], None, None]: - """Convert Huggingface Model to Friendli format(.h5). - - Args: - model (torch.nn.Module): Huggingface model. - state_dict (Dict[str, torch.Tensor]): - Dictionary of mapping of tensor name to tensor - convert_info_list (List[ConvertInfo]): - Dictionary of mapping converted params name to conversion functions. - save_numpy_format (bool, optional): Save the converted tensor in numpy format. - Defaults to True. - """ - self.pre_quantize(model) - model = self.quantize(model) - yield from self.converter.convert(model, convert_info_list, save_numpy_format) - - -class FP8QuantHook(AbstractQuantHook): - """Quantization Hook for FP8Quantizer.""" - - def pre_quantize(self, model: Module) -> torch.nn.Module: # type: ignore[] - """Pre-procedure that should be called before quantize() is called in FP8Quantizer.""" - return model - - def post_quantize(self, model: Module) -> torch.nn.Module: - """Post-procedure that should be called after quantize() is called in FP8Quantizer.""" - return model - - def get_quant_result( - self, quant_inputs: TFQuantInputs, **kwargs: Any - ) -> TFQuantResults: - """Returns the quantization result of the layer.""" - raise NotImplementedError - - def get_quantized_param_names(self, model: torch.nn.Module) -> List[str]: - """Return the parameter names of quantized layers.""" - quantized_param_names = [] - for tf_quant_input in self.iter_tf_quant_inputs(model): - assert isinstance(tf_quant_input, HFTFQuantInputs) - for quant_input in tf_quant_input.quant_inputs: - for target_name in quant_input.target_names: - quantized_param_names.append(f"{target_name}.weight") - return quantized_param_names - - def get_quantized_param_scale_names(self, model): - """Return the parameter scale names of quantized layers.""" - quantized_param_scale_names = [] - for tf_quant_input in self.iter_tf_quant_inputs(model): - assert isinstance(tf_quant_input, HFTFQuantInputs) - for quant_input in tf_quant_input.quant_inputs: - for target_name in quant_input.target_names: - quantized_param_scale_names.append(f"{target_name}.weight_scale") - quantized_param_scale_names.append(f"{target_name}.in_scale") - return quantized_param_scale_names - - @property - def quantized_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for quantized layers.""" - raise NotImplementedError - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified layers.""" - raise NotImplementedError - - -class FP8Quantizer(CommonQuantizer): - """FP8Quantizer for huggingface format. - - This quantizer supports per-tensor weight-activation quantization by - using calibration dataset. It adds quantization scale, and quantized - parameter to the checkpoint, while preserves parameter shape, and name - in huggingface checkpoint. - """ - - def get_calib_dataset(self) -> datasets.Dataset: - """Get calibration dataset.""" - data_cfg = self.quant_config.calibration_dataset - tokenizer = get_tokenizer(self.converter.config.name_or_path) - dataset = safe_load_datasets(data_cfg) - - dataset = ( - dataset.shuffle(self.quant_config.seed) - .select(range(data_cfg.num_samples)) - .select_columns([data_cfg.lookup_column_name]) - ) - - encoded_dataset = tokenizer( - dataset[data_cfg.lookup_column_name], - return_tensors="pt", - padding=True, - truncation=True, - max_length=data_cfg.max_length, - ) - return encoded_dataset["input_ids"] - - def get_convert_info_list(self) -> List[ConvertInfo]: - """Not used in FP8Quantizer.""" - return [] - - def pre_quantize(self, model: Module) -> None: - """Not used in FP8Quantizer.""" - return None - - def _get_weight_act_quantize_results( - self, - model: torch.nn.Module, - names: List[ModuleName], - max_input_stats: Dict[ModuleName, torch.Tensor], - ) -> List[WeightActQuantResult]: - """Get the quantization scales and quantized_weight for a specific layer.""" - assert ( - self.quant_config.quant_dtype == ModelDataType.FP8_E4M3 - ), "currently support fp8_e4m3" - max_val = 448.0 - min_val = -448.0 - input_max = None - for name in names: - input_max = max_input_stats.get(name) - if input_max is not None: - break - assert input_max is not None - target_weights = [model.get_submodule(name).weight for name in names] - target_weight = torch.concat(target_weights) - - act_scale = float(input_max.detach().abs().max().item()) / float(max_val) - weight_scale = float(target_weight.detach().abs().max().item()) / float(max_val) - - q_weights = [ - ( - (weight.detach().float() / weight_scale) - .clip(min_val, max_val) - .to(torch.float8_e4m3fn) - .view(torch.int8) - .to("cpu") - ) - for weight in target_weights - ] - return [ - WeightActQuantResult( - name, - quant_dtype=self.quant_config.quant_dtype, - act_scale=torch.tensor(act_scale, dtype=torch.float32), - weight_scale=torch.tensor(weight_scale, dtype=torch.float32), - q_weight=q_weight, - q_group_size=-1, - zero_point=torch.tensor(0.0), - ) - for name, q_weight in zip(names, q_weights) - ] - - @torch.no_grad() - def quantize( - self, - model: torch.nn.Module, - ) -> torch.nn.Module: - """Quantize model to lower data type. Currently supports FP8.""" - # pylint: disable=too-many-locals - dataset = self.get_calib_dataset() - model.eval() - with self._try_offload_model(model): - max_input_stats, _ = collect_stats( - model, - self.quant_config.device, - dataset, - cast(FP8QuantHook, self.hook).get_linear_layer_types(), - percentile=self.quant_config.percentile, - tqdm_desc="Collecting stats for Static Quantization.", - batch_size=32, - ) - for tf_quant_input in tqdm( - self.hook.iter_tf_quant_inputs(model), - total=len(self.hook.get_tf_blocks(model)), - desc="Quantize", - unit="layer", - ): - assert isinstance(tf_quant_input, HFTFQuantInputs) - for quant_input in tf_quant_input.quant_inputs: - parent_module, local_names, names = ( - quant_input.parent_module, - quant_input.local_names, - quant_input.target_names, - ) - - if isinstance(parent_module, torch.nn.ModuleList): - # For MoE models with seperate expert layers - parent_modules_w_local_name = [] - for p_module in parent_module: - for local_name in local_names: - parent_modules_w_local_name.append( - (p_module, local_name) - ) - - layers = [ - p_module.get_submodule(local_name) - for p_module, local_name in parent_modules_w_local_name - ] - - quant_results = self._get_weight_act_quantize_results( - model, - names, - max_input_stats, - ) - q_layers = [ - WeightActQuantizedLinearLayer.from_layer( - layer, quant_result - ) - for layer, quant_result in zip(layers, quant_results) - ] - for (p_module, local_name), q_layer in zip( - parent_modules_w_local_name, q_layers - ): - setattr(p_module, local_name, q_layer) - - else: - layers = [ - parent_module.get_submodule(local_name) - for local_name in local_names - ] - quant_results = self._get_weight_act_quantize_results( - model, - names, - max_input_stats, - ) - q_layers = [ - WeightActQuantizedLinearLayer.from_layer( - layer, quant_result - ) - for layer, quant_result in zip(layers, quant_results) - ] - for local_name, q_layer in zip(local_names, q_layers): - setattr(parent_module, local_name, q_layer) - - return model - - def convert( # type: ignore[override] - self, - model: torch.nn.Module, - convert_info_list: List[ConvertInfo], - save_numpy_format: bool = False, - ) -> Generator[Tuple[str, Union[torch.Tensor, np.ndarray]], None, None]: - """Convert Huggingface Model to Friendli format(.h5). - - Args: - model (torch.nn.Module): Huggingface model. - state_dict (Dict[str, torch.Tensor]): - Dictionary of mapping of tensor name to tensor - convert_info_list (List[ConvertInfo]): - Dictionary of mapping converted params name to conversion functions. - It will be depreciated. - save_numpy_format (bool, optional): Save the converted tensor in numpy format. - It will be depreciated. - """ - model = cast(FP8QuantHook, self.hook).pre_quantize(model) - model = self.quantize(model) - model = cast(FP8QuantHook, self.hook).post_quantize(model) - state_dict: Dict[str, torch.Tensor] = model.state_dict() - - quantized_param_names = cast(FP8QuantHook, self.hook).get_quantized_param_names( - model - ) - quantized_param_names.extend( - cast(FP8QuantHook, self.hook).get_quantized_param_scale_names(model) - ) - - with tqdm(total=len(state_dict), desc="Converting", unit="tensor") as pbar: - for param_name, param in state_dict.items(): - if param_name not in quantized_param_names: - param = param.to(get_torch_data_type(self.converter.data_type)) - yield param_name, param - pbar.update() diff --git a/friendli/modules/quantizer/layers.py b/friendli/modules/quantizer/layers.py deleted file mode 100644 index 31d104b1..00000000 --- a/friendli/modules/quantizer/layers.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Quantization Layers.""" - -from __future__ import annotations - -from typing import Optional, cast - -import torch - -from friendli.modules.quantizer.schema.data import ( - CommonQuantResult, - WeightActQuantResult, - WeightOnlyQuantResult, -) - - -class WeightOnlyQuantizedLinearLayer(torch.nn.Module): - """Linear Layer with weight only quantization.""" - - def __init__( - self, - in_features: int, - out_features: int, - q_weight: torch.Tensor, - weight_scale: torch.Tensor, - zeros: torch.Tensor, - bias: Optional[torch.nn.Parameter] = None, - ): - """Initialize the Weight Only Quantized Linear Layer.""" - super().__init__() - self.in_features = in_features - self.out_features = out_features - self.weight_scale = torch.nn.Parameter(weight_scale) - self.zeros = torch.nn.Parameter(zeros, requires_grad=False) - self.weight = torch.nn.Parameter(q_weight, requires_grad=False) - self.register_parameter("bias", bias) - - @staticmethod - def from_layer( - layer: torch.nn.Module, quant_result: CommonQuantResult - ) -> torch.nn.Module: - """Returns the quantized layer from the original layer.""" - q_result = cast(WeightOnlyQuantResult, quant_result) - return WeightOnlyQuantizedLinearLayer( - cast(torch.nn.Linear, layer).in_features, - cast(torch.nn.Linear, layer).out_features, - q_result.q_weight, - q_result.weight_scale, - q_result.zero_point, - cast(torch.nn.Linear, layer).bias, - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """Forward pass with fake quantization. Not used in conversion.""" - raise NotImplementedError("Not used in conversion.") - - -class WeightActQuantizedLinearLayer(torch.nn.Module): - """Linear Layer with weight-act quantization.""" - - def __init__( # pylint: disable=too-many-arguments - self, - q_weight: torch.Tensor, - weight_scale: torch.Tensor, - act_scale: torch.Tensor, - bias: Optional[torch.nn.Parameter] = None, - ): - """Initialize the Weight Only Quantized Linear Layer.""" - super().__init__() - self.in_scale = torch.nn.Parameter(act_scale) - self.weight_scale = torch.nn.Parameter(weight_scale) - self.weight = torch.nn.Parameter(q_weight, requires_grad=False) - self.register_parameter("bias", bias) - - @staticmethod - def from_layer( - layer: torch.nn.Module, quant_result: CommonQuantResult - ) -> torch.nn.Module: - """Returns the quantized layer from the original layer.""" - q_result = cast(WeightActQuantResult, quant_result) - return WeightActQuantizedLinearLayer( - q_result.q_weight, - q_result.weight_scale, - q_result.act_scale, - cast(torch.nn.Linear, layer).bias if hasattr(layer, "bias") else None, - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """Forward pass with fake quantization. Not used in conversion.""" - raise NotImplementedError("Not used in conversion.") diff --git a/friendli/modules/quantizer/maps.py b/friendli/modules/quantizer/maps.py deleted file mode 100644 index 465d5c3e..00000000 --- a/friendli/modules/quantizer/maps.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Quantizer Maps.""" - -from __future__ import annotations - -from typing import Any, Dict, Type - -from friendli.enums import QuantMode -from friendli.errors import NotSupportedQuantModeError -from friendli.modules.converter.base import OneOfConverter -from friendli.modules.converter.utils import get_model_arch -from friendli.modules.quantizer.awq.base import AWQHook, AWQQuantizer -from friendli.modules.quantizer.awq.models.gpt_neox import AWQGPTNeoXHook -from friendli.modules.quantizer.awq.models.gptj import AWQGPTJHook -from friendli.modules.quantizer.awq.models.llama import AWQLlamaHook -from friendli.modules.quantizer.awq.models.mpt import AWQMPTHook -from friendli.modules.quantizer.base import CommonQuantizer, FP8QuantHook, FP8Quantizer -from friendli.modules.quantizer.models.arctic import ArcticHook -from friendli.modules.quantizer.models.dbrx import DbrxHook -from friendli.modules.quantizer.models.llama import LlamaHook -from friendli.modules.quantizer.models.mixtral import MixtralHook -from friendli.modules.quantizer.models.mpt import MPTHook -from friendli.modules.quantizer.models.phi3 import Phi3Hook -from friendli.modules.quantizer.schema.config import OneOfQuantConfig -from friendli.modules.quantizer.smoothquant.base import ( - SmoothQuantHook, - SmoothQuantQuantizer, -) -from friendli.modules.quantizer.smoothquant.models.bloom import SmoothQuantBloomHook -from friendli.modules.quantizer.smoothquant.models.codegen import SmoothQuantCodeGenHook -from friendli.modules.quantizer.smoothquant.models.falcon import SmoothQuantFalconHook -from friendli.modules.quantizer.smoothquant.models.gpt2 import SmoothQuantGPT2Hook -from friendli.modules.quantizer.smoothquant.models.gpt_neox import ( - SmoothQuantGPTNeoXHook, -) -from friendli.modules.quantizer.smoothquant.models.gptj import SmoothQuantGPTJHook -from friendli.modules.quantizer.smoothquant.models.llama import SmoothQuantLlamaHook -from friendli.modules.quantizer.smoothquant.models.mpt import SmoothQuantMPTHook -from friendli.modules.quantizer.smoothquant.models.opt import SmoothQuantOPTHook - -model_arch_smoothquant_hook_map: Dict[str, type[SmoothQuantHook]] = { - "OPTForCausalLM": SmoothQuantOPTHook, - "MPTForCausalLM": SmoothQuantMPTHook, - "BloomForCausalLM": SmoothQuantBloomHook, - "CodeGenForCausalLM": SmoothQuantCodeGenHook, - "GPTNeoXForCausalLM": SmoothQuantGPTNeoXHook, - "GPTJForCausalLM": SmoothQuantGPTJHook, - "GPT2LMHeadModel": SmoothQuantGPT2Hook, - "FalconForCausalLM": SmoothQuantFalconHook, - "LlamaForCausalLM": SmoothQuantLlamaHook, -} - -model_arch_awq_hook_map: Dict[str, type[AWQHook]] = { - "GPTJForCausalLM": AWQGPTJHook, - "GPTNeoXForCausalLM": AWQGPTNeoXHook, - "LlamaForCausalLM": AWQLlamaHook, - "MPTForCausalLM": AWQMPTHook, - "MistralForCausalLM": AWQLlamaHook, -} - -model_arch_fp8_hook_map: Dict[str, type[FP8QuantHook]] = { - "LlamaForCausalLM": LlamaHook, - "MistralForCausalLM": LlamaHook, - "MixtralForCausalLM": MixtralHook, - "MPTForCausalLM": MPTHook, - "CohereForCausalLM": LlamaHook, - "DbrxForCausalLM": DbrxHook, - "Phi3ForCausalLM": Phi3Hook, - "ArcticForCausalLM": ArcticHook, -} - - -def get_quanthook_map(quant_mode: QuantMode) -> Dict[str, Any]: - """Get quantizer map.""" - if quant_mode == QuantMode.SMOOTH_QUANT: - return model_arch_smoothquant_hook_map - if quant_mode == QuantMode.AWQ: - return model_arch_awq_hook_map - if quant_mode == QuantMode.FP8: - return model_arch_fp8_hook_map - raise NotSupportedQuantModeError( - invalid_option=quant_mode, - valid_options=[e.value for e in QuantMode], - ) - - -def get_quantizer_class(quant_mode: QuantMode) -> Type[CommonQuantizer]: - """Get quantizer class.""" - if quant_mode == QuantMode.SMOOTH_QUANT: - return SmoothQuantQuantizer - if quant_mode == QuantMode.AWQ: - return AWQQuantizer - if quant_mode == QuantMode.FP8: - return FP8Quantizer - raise NotSupportedQuantModeError( - invalid_option=quant_mode, - valid_options=[e.value for e in QuantMode], - ) - - -def get_quantized_converter( - quant_config: OneOfQuantConfig, - converter: OneOfConverter, -) -> CommonQuantizer: - """Get quantizer for specific model architecture with quant mode and args.""" - model_arch = get_model_arch(converter.config) - quant_mode = quant_config.mode - quantizer = get_quantizer_class(quant_mode) - quanthook_map = get_quanthook_map(quant_mode) - quanthook = quanthook_map[model_arch](quant_config, converter) - return quantizer(quanthook, quant_config, converter) diff --git a/friendli/modules/quantizer/models/arctic.py b/friendli/modules/quantizer/models/arctic.py deleted file mode 100644 index cc7d3fd9..00000000 --- a/friendli/modules/quantizer/models/arctic.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli ArcticForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type - -import torch - -from friendli.modules.quantizer.base import FP8QuantHook -from friendli.modules.quantizer.schema.data import ( - HFQuantInput, - HFTFQuantInputs, - TFQuantInputs, -) - - -class ArcticHook(FP8QuantHook): - """FP8QuantHook for ArcticForCausalLM.""" - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in ArcticForCausalLM.""" - return model.model.layers - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in ArcticForCausalLM.""" - return (torch.nn.Linear,) - - def iter_tf_quant_inputs( - self, model: torch.nn.Module - ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]: - """Returns the layers which should be quantized in transformer block of ArcticForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.self_attn - block_sparse_moe = decoder_layer.block_sparse_moe - mlp = decoder_layer.residual_mlp - moe_ff1_ff_gate_target_names = [] - for expert_idx in range(self.converter.num_experts): - moe_ff1_ff_gate_target_names.extend( - [ - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w1", - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w3", - ] - ) - - yield HFTFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", - f"{self.quantized_layer_prefix}{index}.self_attn.k_proj", - f"{self.quantized_layer_prefix}{index}.self_attn.v_proj", - ], - local_names=["q_proj", "k_proj", "v_proj"], - ), - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - ], - local_names=[ - "o_proj", - ], - ), - # router - HFQuantInput( - parent_module=block_sparse_moe, - target_names=[ - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.gate", - ], - local_names=["gate"], - ), - # ff1, ff_gate in each moe - HFQuantInput( - parent_module=block_sparse_moe.experts, - target_names=moe_ff1_ff_gate_target_names, - local_names=["w1", "w3"], - ), - # ff2 in each moe - HFQuantInput( - parent_module=block_sparse_moe.experts, - target_names=[ - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w2" - for expert_idx in range(self.converter.num_experts) - ], - local_names=["w2"], - ), - # ff1, ff_gate in parallel mlp - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.residual_mlp.w1", - f"{self.quantized_layer_prefix}{index}.residual_mlp.w3", - ], - local_names=["w1", "w3"], - ), - # ff2 in parallel mlp - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.residual_mlp.w2" - ], - local_names=["w2"], - ), - ], - ) diff --git a/friendli/modules/quantizer/models/dbrx.py b/friendli/modules/quantizer/models/dbrx.py deleted file mode 100644 index f4e3232a..00000000 --- a/friendli/modules/quantizer/models/dbrx.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli DbrxForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Dict, Iterator, List, Tuple, Type, cast - -import torch -from torch.nn.modules import Module -from tqdm import tqdm -from transformers.models.dbrx.modeling_dbrx import DbrxBlock, DbrxConfig, DbrxExpertGLU - -from friendli.modules.quantizer.base import FP8QuantHook -from friendli.modules.quantizer.schema.data import ( - HFQuantInput, - HFTFQuantInputs, - TFQuantInputs, -) - - -class DbrxLinearLayer(torch.nn.Module): - """Custom FF2Proj layer for DbrxForCausalLM.""" - - def __init__(self, weight: torch.nn.Parameter): - """Initialize the DbrxLinearLayer.""" - super().__init__() - self.weight = weight - - def forward(self, x: torch.Tensor, chunked_weight: torch.Tensor) -> torch.Tensor: - """Forward pass for the DbrxLinearLayer.""" - return x.matmul(chunked_weight) - - -class CustomDbrxExpertGLU(DbrxExpertGLU): - """Custom DbrxExpertGLU layer for DbrxForCausalLM. - - This layer is used to replace the DbrxExpertGLU layer in DbrxForCausalLM. - For collecting input of the ff2 layer in each experts, we need to override the forward method. - """ - - def __init__(self, layer: DbrxExpertGLU, ffn_act_fn: Dict): - """Initialize the CustomDbrxExpertGLU.""" - super().__init__( - layer.hidden_size, layer.ffn_hidden_size, layer.moe_num_experts, ffn_act_fn - ) - - self.v1_linear = DbrxLinearLayer(layer.v1.detach()) - self.w1_linear = DbrxLinearLayer(layer.w1.detach()) - self.w2_linear = DbrxLinearLayer(layer.w2.detach()) - - def forward( - self, - x: torch.Tensor, - expert_w1: torch.Tensor, - expert_v1: torch.Tensor, - expert_w2: torch.Tensor, - ) -> torch.Tensor: - """Forward pass for the CustomDbrxExpertGLU.""" - gate_proj = self.w1_linear(x, expert_w1.t()) - up_proj = self.v1_linear(x, expert_v1.t()) - gate_proj = self.activation_fn(gate_proj) - intermediate_states = gate_proj * up_proj - down_proj = self.w2_linear(intermediate_states, expert_w2) - return down_proj - - @staticmethod - def from_layer(layer: DbrxExpertGLU, config: DbrxConfig) -> CustomDbrxExpertGLU: - """Creates a CustomDbrxExpertGLU layer from a DbrxExpertGLU layer.""" - custom_layer = CustomDbrxExpertGLU(layer, config.ffn_config.ffn_act_fn) - custom_layer.v1 = layer.v1 - custom_layer.w1 = layer.w1 - custom_layer.w2 = layer.w2 - return custom_layer - - -class DbrxHook(FP8QuantHook): - """FP8QuantHook for DbrxForCausalLM.""" - - def get_quantized_param_names(self, model: torch.nn.Module) -> List[str]: - """Return the parameter names of quantized layers.""" - quantized_param_names = [] - for index in range( - len(self.get_tf_blocks(model)) # type: ignore[union-attr, arg-type] - ): - quantized_param_names.extend( - [ - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv.weight", - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj.weight", - f"{self.quantized_layer_prefix}{index}.ffn.router.layer.weight", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2", - ] - ) - return quantized_param_names - - def get_quantized_param_scale_names(self, model: torch.nn.Module) -> List[str]: - """Return the parameter scale names of quantized layers.""" - quantized_param_scale_names = [] - for index in range( - len(self.get_tf_blocks(model)) # type: ignore[union-attr, arg-type] - ): - quantized_param_scale_names.extend( - [ - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv.weight_scale", - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj.weight_scale", - f"{self.quantized_layer_prefix}{index}.ffn.router.layer.weight_scale", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1_weight_scale", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1_weight_scale", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2_weight_scale", - ] - ) - quantized_param_scale_names.extend( - [ - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv.in_scale", - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj.in_scale", - f"{self.quantized_layer_prefix}{index}.ffn.router.layer.in_scale", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1_in_scale", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1_in_scale", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2_in_scale", - ] - ) - return quantized_param_scale_names - - def pre_quantize(self, model: Module) -> torch.nn.Module: - """Pre-quantization hook for DbrxForCausalLM.""" - for decoder_layer in tqdm( - self.get_tf_blocks(model), - desc="Pre-quantizing DbrxForCausalLM", - unit="layer", - ): - cast( - DbrxBlock, decoder_layer - ).ffn.experts.mlp = CustomDbrxExpertGLU.from_layer( - cast(DbrxBlock, decoder_layer).ffn.experts.mlp, self.converter.config - ) - return model - - def post_quantize(self, model: Module) -> torch.nn.Module: - """Post-quantization hook for DbrxForCausalLM.""" - for decoder_layer in tqdm( - self.get_tf_blocks(model), - desc="Post-quantizing DbrxForCausalLM", - unit="layer", - ): - mlp = cast(DbrxBlock, decoder_layer).ffn.experts.mlp - - # ff1 - setattr(mlp, "v1_in_scale", mlp.v1_linear.in_scale) - setattr(mlp, "v1_weight_scale", mlp.v1_linear.weight_scale) - mlp.v1 = mlp.v1_linear.weight - del mlp.v1_linear - - # ff_gate - setattr(mlp, "w1_in_scale", mlp.w1_linear.in_scale) - setattr(mlp, "w1_weight_scale", mlp.w1_linear.weight_scale) - mlp.w1 = mlp.w1_linear.weight - del mlp.w1_linear - - # ff2 - setattr(mlp, "w2_in_scale", mlp.w2_linear.in_scale) - setattr(mlp, "w2_weight_scale", mlp.w2_linear.weight_scale) - mlp.w2 = mlp.w2_linear.weight - del mlp.w2_linear - return model - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in DbrxForCausalLM.""" - return model.transformer.blocks - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in DbrxForCausalLM.""" - return ( - torch.nn.Linear, - DbrxLinearLayer, - ) - - def iter_tf_quant_inputs( - self, model: torch.nn.Module - ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]: - """Returns the layers which should be quantized in transformer block of DbrxForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = cast(DbrxBlock, decoder_layer).norm_attn_norm.attn - mlp = cast(DbrxBlock, decoder_layer).ffn.experts.mlp - - yield HFTFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv", - ], - local_names=["Wqkv"], - ), - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj", - ], - local_names=[ - "out_proj", - ], - ), - HFQuantInput( - parent_module=cast(DbrxBlock, decoder_layer).ffn.router, - target_names=[ - f"{self.quantized_layer_prefix}{index}.ffn.router.layer", - ], - local_names=["layer"], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1_linear", - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1_linear", - ], - local_names=["w1_linear", "v1_linear"], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2_linear" - ], - local_names=["w2_linear"], - ), - ], - ) diff --git a/friendli/modules/quantizer/models/llama.py b/friendli/modules/quantizer/models/llama.py deleted file mode 100644 index d4002955..00000000 --- a/friendli/modules/quantizer/models/llama.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli LlamaForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Iterator, List, Tuple, Type - -import torch - -from friendli.modules.quantizer.base import FP8QuantHook -from friendli.modules.quantizer.schema.data import ( - HFQuantInput, - HFTFQuantInputs, - TFQuantInputs, -) - - -class LlamaHook(FP8QuantHook): - """FP8QuantHook for LlamaForCausalLM.""" - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in LlamaForCausalLM.""" - return model.model.layers - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in LlamaForCausalLM.""" - return (torch.nn.Linear,) - - def iter_tf_quant_inputs( - self, model: torch.nn.Module - ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]: - """Returns the layers which should be quantized in transformer block of LlamaForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.self_attn - mlp = decoder_layer.mlp - - yield HFTFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", - f"{self.quantized_layer_prefix}{index}.self_attn.k_proj", - f"{self.quantized_layer_prefix}{index}.self_attn.v_proj", - ], - local_names=["q_proj", "k_proj", "v_proj"], - ), - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - ], - local_names=[ - "o_proj", - ], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.up_proj", - f"{self.quantized_layer_prefix}{index}.mlp.gate_proj", - ], - local_names=["up_proj", "gate_proj"], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.down_proj" - ], - local_names=["down_proj"], - ), - ], - ) diff --git a/friendli/modules/quantizer/models/mixtral.py b/friendli/modules/quantizer/models/mixtral.py deleted file mode 100644 index 70abc34b..00000000 --- a/friendli/modules/quantizer/models/mixtral.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli MixtralForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List - -import torch - -from friendli.modules.quantizer.models.llama import LlamaHook -from friendli.modules.quantizer.schema.data import ( - HFQuantInput, - HFTFQuantInputs, - TFQuantInputs, -) - - -class MixtralHook(LlamaHook): - """FP8QuantHook for MixtralForCausalLM.""" - - def iter_tf_quant_inputs( - self, model: torch.nn.Module - ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]: - """Returns the layers which should be quantized in transformer block of MixtralForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.self_attn - block_sparse_moe = decoder_layer.block_sparse_moe - moe_ff1_ff_gate_target_names = [] - for expert_idx in range(self.converter.num_experts): - moe_ff1_ff_gate_target_names.extend( - [ - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w1", - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w3", - ] - ) - - yield HFTFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", - f"{self.quantized_layer_prefix}{index}.self_attn.k_proj", - f"{self.quantized_layer_prefix}{index}.self_attn.v_proj", - ], - local_names=["q_proj", "k_proj", "v_proj"], - ), - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - ], - local_names=[ - "o_proj", - ], - ), - # router - HFQuantInput( - parent_module=block_sparse_moe, - target_names=[ - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.gate", - ], - local_names=["gate"], - ), - # ff1, ff_gate in each moe - HFQuantInput( - parent_module=block_sparse_moe.experts, - target_names=moe_ff1_ff_gate_target_names, - local_names=["w1", "w3"], - ), - # ff2 in each moe - HFQuantInput( - parent_module=block_sparse_moe.experts, - target_names=[ - f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w2" - for expert_idx in range(self.converter.num_experts) - ], - local_names=["w2"], - ), - ], - ) diff --git a/friendli/modules/quantizer/models/mpt.py b/friendli/modules/quantizer/models/mpt.py deleted file mode 100644 index 39a17ff1..00000000 --- a/friendli/modules/quantizer/models/mpt.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli MPTForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type - -import torch - -from friendli.modules.quantizer.base import FP8QuantHook -from friendli.modules.quantizer.schema.data import ( - HFQuantInput, - HFTFQuantInputs, - TFQuantInputs, -) - - -class MPTHook(FP8QuantHook): - """FP8QuantHook for MPTForCausalLM.""" - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in MPTForCausalLM.""" - return model.transformer.blocks - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in MPTForCausalLM.""" - return (torch.nn.Linear,) - - def iter_tf_quant_inputs( - self, model: torch.nn.Module - ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]: - """Returns the layers which should be quantized in transformer block of MPTForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.attn - mlp = decoder_layer.ffn - - yield HFTFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - ], - local_names=["Wqkv"], - ), - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - ], - local_names=[ - "out_proj", - ], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.ffn.up_proj", - ], - local_names=["up_proj"], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.ffn.down_proj" - ], - local_names=["down_proj"], - ), - ], - ) diff --git a/friendli/modules/quantizer/models/phi3.py b/friendli/modules/quantizer/models/phi3.py deleted file mode 100644 index 4d4d15cb..00000000 --- a/friendli/modules/quantizer/models/phi3.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Phi3ForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type - -import torch - -from friendli.modules.quantizer.base import FP8QuantHook -from friendli.modules.quantizer.schema.data import ( - HFQuantInput, - HFTFQuantInputs, - TFQuantInputs, -) - - -class Phi3Hook(FP8QuantHook): - """FP8QuantHook for Phi3ForCausalLM.""" - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the transformer blocks in Phi3ForCausalLM.""" - return model.model.layers - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in Phi3ForCausalLM.""" - return (torch.nn.Linear,) - - def iter_tf_quant_inputs( - self, model: torch.nn.Module - ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]: - """Returns the layers which should be quantized in transformer block of Phi3ForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.self_attn - mlp = decoder_layer.mlp - - yield HFTFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.qkv_proj", - ], - local_names=["qkv_proj"], - ), - HFQuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - ], - local_names=[ - "o_proj", - ], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.gate_up_proj", - ], - local_names=["gate_up_proj"], - ), - HFQuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.down_proj" - ], - local_names=["down_proj"], - ), - ], - ) diff --git a/friendli/modules/quantizer/schema/__init__.py b/friendli/modules/quantizer/schema/__init__.py deleted file mode 100644 index f5d8dd04..00000000 --- a/friendli/modules/quantizer/schema/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer Schema.""" diff --git a/friendli/modules/quantizer/schema/config.py b/friendli/modules/quantizer/schema/config.py deleted file mode 100644 index 2ca36f7b..00000000 --- a/friendli/modules/quantizer/schema/config.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer Config Schema.""" - -from __future__ import annotations - -from typing import Literal, Union - -from pydantic import BaseModel, Field -from typing_extensions import Annotated - -from friendli.enums import ModelDataType, QuantDatasetFormat, QuantMode - - -class CalibrationDatasetConfig(BaseModel): - """Calibration dataset config.""" - - path_or_name: str = "cnn_dailymail:3.0.0" - format: QuantDatasetFormat = QuantDatasetFormat.JSON - split: str = "validation" - lookup_column_name: str = "article" - num_samples: int = 512 - max_length: int = 512 - - -class AbstractQuantConfig(BaseModel): - """Abstract quantization config.""" - - mode: QuantMode - device: str = "cuda:0" - offload: bool = True - seed: int = 42 - percentile: float = 100.0 - quant_dtype: ModelDataType = ModelDataType.INT8 - calibration_dataset: CalibrationDatasetConfig = Field( - default_factory=CalibrationDatasetConfig - ) - - -class FP8QuantConfig(AbstractQuantConfig): - """FP8 quantization config. - - The data type of parameters are converted to the one specified at `quant_dtype` - by using calibration dataset. The quantization scale for weight and activation is - added to converted checkpoint. - - """ - - mode: Literal[QuantMode.FP8] = QuantMode.FP8 - - -class SmoothQuantArgs(BaseModel): - """SmoothQuant args.""" - - migration_strength: float = 0.5 - attn_fc_smoothing: bool = False - ff2_smoothing: bool = False - - -class SmoothQuantConfig(AbstractQuantConfig): - """SmoothQuant config.""" - - mode: Literal[QuantMode.SMOOTH_QUANT] = QuantMode.SMOOTH_QUANT - smoothquant_args: SmoothQuantArgs = Field(default_factory=SmoothQuantArgs) - - -class AWQArgs(BaseModel): - """AWQ args.""" - - quant_dtype: ModelDataType = ModelDataType.INT4 - quant_bit: int = 4 - quant_group_size: int = 64 - - -class AWQConfig(AbstractQuantConfig): - """AWQ config.""" - - mode: Literal[QuantMode.AWQ] = QuantMode.AWQ - awq_args: AWQArgs = Field(default_factory=AWQArgs) - - -OneOfQuantConfig = Annotated[ - Union[SmoothQuantConfig, AWQConfig, FP8QuantConfig], Field(discriminator="mode") -] - - -class QuantConfig(BaseModel): - """Quantization config.""" - - config: OneOfQuantConfig diff --git a/friendli/modules/quantizer/schema/data.py b/friendli/modules/quantizer/schema/data.py deleted file mode 100644 index ae472126..00000000 --- a/friendli/modules/quantizer/schema/data.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer Data Schema.""" -from __future__ import annotations - -from dataclasses import dataclass -from typing import Callable, List, Optional - -import torch - -from friendli.enums import ModelDataType - -ModuleName = str - - -@dataclass -class CommonQuantResult: - """Dataclass for quantization result per layer.""" - - module_name: str - quant_dtype: ModelDataType - q_group_size: int - zero_point: torch.Tensor - - -@dataclass -class WeightOnlyQuantResult(CommonQuantResult): - """Dataclass for weight-only quantization result per layer.""" - - weight_scale: torch.Tensor - q_weight: torch.Tensor - - -@dataclass -class WeightActQuantResult(WeightOnlyQuantResult): - """Dataclass for weight-activation quantization result per layer.""" - - act_scale: torch.Tensor - zero_point: torch.Tensor - q_group_size: int - - -@dataclass -class QuantInput: - """Dataclass for int8 quantization input of each layer in transformer block.""" - - weight: torch.Tensor # [OutDim, InDim] - name: ModuleName - start_offset: Optional[int] # start offset of the weight tensor along the out_dim - end_offset: Optional[int] # end offset of the weight tensor along the out_dim - sort_fn: Optional[ - Callable[[torch.Tensor], torch.Tensor] - ] = None # sort function for max_output_stats - - -@dataclass -class HFQuantInput: - """Dataclass for quantization input of each layer in transformer block. - - Attributes: - parent_module: module contains target layers. - target_names: list of target module's full name - (ex. model.model.layers.0.self_attn.q_proj, ) - local_names: list of target module's name using when access from parent_module - (ex. q_proj, k_proj, v_proj ) - """ - - parent_module: torch.nn.Module - target_names: List[ModuleName] - local_names: str - - -@dataclass -class HFTFQuantInputs: - """Dataclass for quantization input per transformer block.""" - - layer_index: int - block: torch.nn.Module - quant_inputs: List[HFQuantInput] - - -@dataclass -class TFQuantInputs: # pylint: disable=too-many-instance-attributes - """Dataclass for int8 quantization input per transformer block.""" - - layer_index: int - block: torch.nn.Module - q: QuantInput - k: QuantInput - v: QuantInput - attn_fc: QuantInput - ff1: QuantInput - ff2: QuantInput - - -@dataclass -class TFQuantResults: # pylint: disable=too-many-instance-attributes - """Dataclass for int8 quantization result per a transformer block.""" - - layer_prefix_with_index: str - block: torch.nn.Module - q: CommonQuantResult - k: CommonQuantResult - v: CommonQuantResult - attn_fc: CommonQuantResult - ff1: CommonQuantResult - ff2: CommonQuantResult diff --git a/friendli/modules/quantizer/smoothquant/__init__.py b/friendli/modules/quantizer/smoothquant/__init__.py deleted file mode 100644 index 5205fe18..00000000 --- a/friendli/modules/quantizer/smoothquant/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model SmoothQuant Quantizer.""" diff --git a/friendli/modules/quantizer/smoothquant/base.py b/friendli/modules/quantizer/smoothquant/base.py deleted file mode 100644 index 8ee4e1a7..00000000 --- a/friendli/modules/quantizer/smoothquant/base.py +++ /dev/null @@ -1,567 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli SmoothQuant Quantizer Base.""" - -from __future__ import annotations - -from abc import abstractmethod -from dataclasses import fields -from typing import Any, Dict, Iterator, List, Tuple, cast - -import datasets # type: ignore[import] -import torch - -from friendli.enums import ModelDataType -from friendli.errors import NotSupportedQuantConfigError -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.interface import ModelConversionInterface -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.converter.utils import get_tokenizer -from friendli.modules.quantizer.base import AbstractQuantHook, CommonQuantizer -from friendli.modules.quantizer.layers import WeightActQuantizedLinearLayer -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ( - ModuleName, - QuantInput, - TFQuantInputs, - TFQuantResults, - WeightActQuantResult, -) -from friendli.modules.quantizer.utils import ( - collect_stats, - get_weight_act_quant_scales, - quantized_linear_weight_reshape, - quantized_qkv_weight_reshape, - safe_load_datasets, - scale_reshape, -) - - -class PreSmoother(torch.nn.Module): - """Module for containing smoothing scale. - - This module is used to contain the smoothing scale for the quantization. - If the matmul layer have previous layer, the smoothing scale can be migrated - to the previous layer. But, if the matmul layer is the first layer, the scale - need to be stored in this module. Especially, When MLP ff2 layer with previous activation - layer that prevent migrating the scale to the previous layer needs SmoothQuant, then, - this module is used to store the smoothing scale. [SmoothQunat Issue #15] - (https://github.com/mit-han-lab/smoothquant/issues/15#issuecomment-1353390283). - - Args: - in_dim (float): input dimension of the matmul layer's weight dimension. - """ - - def __init__(self, in_dim: int): - """Initialize PreSmoother.""" - super().__init__() - self.scale = torch.nn.Parameter(torch.ones(in_dim, dtype=torch.float32)) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """Forward function of PreSmoother.""" - return (x * self.scale).to(x.dtype) - - -class SmoothQuantHook(AbstractQuantHook): - """Quantization Hook for SmoothQuant.""" - - @abstractmethod - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the attention fc layer in the decoder block.""" - - @abstractmethod - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the second feed-forward layer in the decoder block.""" - - @abstractmethod - def iter_smooth_norm_weights( - self, model: torch.nn.Module - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm and linear layer's weight per transformer block.""" - - def _register_pre_smoother(self, linear: torch.nn.Linear) -> PreSmoother: - """Register pre_smoother storing smoothing scale of linear layer.""" - pre_smoother = PreSmoother(linear.in_features).to(device=linear.weight.device) - - def pre_smoother_hook(_, x: Tuple[Any, ...]) -> Tuple[torch.Tensor, ...]: - return (pre_smoother.forward(x[0]),) - - linear.register_forward_pre_hook(pre_smoother_hook) - return pre_smoother - - def pre_smooth( - self, - model: torch.nn.Module, - ) -> torch.nn.Module: - """Pre-procedure for SmoothQuant before Smoothing.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - for decoder_layer in self.get_tf_blocks(model): - if quant_args.attn_fc_smoothing: - attn_fc_pre_smoother = self._register_pre_smoother( - self.get_attn_fc_layer(decoder_layer) - ) - decoder_layer.add_module("attn_fc_pre_smoother", attn_fc_pre_smoother) - if quant_args.ff2_smoothing: - ff2_pre_smoother = self._register_pre_smoother( - self.get_ff2_layer(decoder_layer) - ) - decoder_layer.add_module("ff2_pre_smoother", ff2_pre_smoother) - return model - - def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor: - """Sort max_output_stas for seperating qkv_layer's output_stats.""" - return max_output_stat - - def copy_norms(self, model: torch.nn.Module) -> torch.nn.Module: - """Copy and Register norms in transformer block for seperated scaling. - - In some models(e.g. llama, gptj, codegen), matmul layers share activations - from the same norms. Therefore, we need to copy and register the norms for - seperated smoothing scale. For example, in llama, normalization layer is - shared with gate linear layer and attention linear layer. Thus, we need to - copy and register the norms for each linear layer and use them for smoothing. - """ - return model - - def get_quant_result( - self, - quant_inputs: TFQuantInputs, - **kwargs: Any, - ) -> TFQuantResults: - """Returns the quantization result of the quantized layer. - - If the model has another quantized layer, it should be implemented in the subclass. - - """ - max_input_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_input_stats"] - max_output_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_output_stats"] - - def get_scale( - quant_input: QuantInput, - ) -> WeightActQuantResult: - weight, name, start, end, sort_fn = ( - quant_input.weight, - quant_input.name, - quant_input.start_offset, - quant_input.end_offset, - quant_input.sort_fn, - ) - - return get_weight_act_quant_scales( - name, - max_input_stats[name], - weight[start:end], - weight[start:end], - sort_fn(max_output_stats[name])[start:end] - if sort_fn - else max_output_stats[name][start:end], - ) - - return TFQuantResults( - layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_inputs.layer_index}.", - block=quant_inputs.block, - q=get_scale(quant_inputs.q), - k=get_scale(quant_inputs.k), - v=get_scale(quant_inputs.v), - attn_fc=get_scale(quant_inputs.attn_fc), - ff1=get_scale(quant_inputs.ff1), - ff2=get_scale(quant_inputs.ff2), - ) - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for modified modules. - - This convert_info_list is used for modules that are modified for quantization. - Especially, for attention fc layer and MLP ff2 layer, we need to migrate - smooth scale to the previous layer. Thus, we add the smoothing scaler, and - modify the convert_info_list for the modified modules. - - In some models, matmul layers share activations from the same norms. Therefore, - we use `copy_norms()` to copy and register the norms for seperated smoothing scale. - Thus, we modify the convert_info_list for the modified modules. - """ - sq_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - new_layer_convert_info_list = [] - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - - if sq_args.attn_fc_smoothing: - new_layer_convert_info_list.append( - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc_pre_smoother.scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_proj/smoothquant/smoothing_vector:0", # pylint: disable=line-too-long - reshape_fn=scale_reshape, - ) - ) - if sq_args.ff2_smoothing: - new_layer_convert_info_list.append( - ConvertInfo( - param_names=[f"{layer_prefix}ff2_pre_smoother.scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/smoothing_vector:0", # pylint: disable=line-too-long - reshape_fn=scale_reshape, - ) - ) - - return new_layer_convert_info_list - - @property - def quantized_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Return the list of conversion informations for quantized layers.""" - convert_info_list = [] - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}q.weight_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/q_weight_scale:0", # pylint: disable=line-too-long - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}k.weight_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/k_weight_scale:0", # pylint: disable=line-too-long - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}v.weight_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/v_weight_scale:0", # pylint: disable=line-too-long - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}q.out_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/q_out_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}k.out_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/k_out_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}v.out_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/v_out_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}q.in_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/in_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc.weight_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_proj/smoothquant/weight_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc.out_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_proj/smoothquant/out_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc.in_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}attn/c_proj/smoothquant/in_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff1.weight_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/weight_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff1.out_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/out_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff1.in_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/in_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff2.weight_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/weight_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff2.out_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/out_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff2.in_scale"], - data_type=ModelDataType.FP32, - converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/in_scale:0", - reshape_fn=scale_reshape, - ), - ConvertInfo( - param_names=[ - f"{layer_prefix}q.weight", - f"{layer_prefix}k.weight", - f"{layer_prefix}v.weight", - ], - data_type=ModelDataType.INT8, - converted_name=f"{converted_prefix}attn/c_attn/smoothquant/weight:0", - reshape_fn=quantized_qkv_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}attn_fc.weight"], - data_type=ModelDataType.INT8, - converted_name=f"{converted_prefix}attn/c_proj/smoothquant/weight:0", - reshape_fn=quantized_linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff1.weight"], - data_type=ModelDataType.INT8, - converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/weight:0", - reshape_fn=quantized_linear_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ff2.weight"], - data_type=ModelDataType.INT8, - converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/weight:0", - reshape_fn=quantized_linear_weight_reshape, - ), - ] - ) - return convert_info_list - - -class SmoothQuantQuantizer(CommonQuantizer, ModelConversionInterface): - """Quantizer for SmoothQuant.""" - - def check_config(self) -> None: - """Check if the SmoothQuant quantization config is valid.""" - quant_config = cast(SmoothQuantConfig, self.quant_config) - smoothquant_args = quant_config.smoothquant_args - super().check_config() - if 0 > smoothquant_args.migration_strength > 1: - raise NotSupportedQuantConfigError( - invalid_option=str(smoothquant_args.migration_strength), - valid_options=["between 0 and 1."], - ) - - def get_calib_dataset(self) -> datasets.Dataset: - """Get calibration dataset for SmoothQuant.""" - data_cfg = self.quant_config.calibration_dataset - tokenizer = get_tokenizer(self.converter.config.name_or_path) - dataset = safe_load_datasets(data_cfg) - - def preprocess(example) -> Dict[str, torch.Tensor]: - truncate_length = data_cfg.max_length * 4 - while True: - input_ids = tokenizer( - example[data_cfg.lookup_column_name][:truncate_length], - return_tensors="pt", - max_length=data_cfg.max_length * 2, - truncation=True, - padding=False, - ).input_ids - - if input_ids.size( - 1 - ) >= data_cfg.max_length * 2 or truncate_length >= len( - example[data_cfg.lookup_column_name] - ): - input_ids = input_ids[:, : data_cfg.max_length] - break - - truncate_length *= 2 - return {"input_ids": input_ids} - - dataset = ( - dataset.shuffle(self.quant_config.seed) - .select(range(data_cfg.num_samples)) - .select_columns([data_cfg.lookup_column_name]) - .map(function=preprocess) - ) - - return dataset - - @torch.no_grad() - def _perform_smoothing( - self, - activation_norms: List[torch.Tensor], - fc_weights: List[torch.Tensor], - activation_max: torch.Tensor, - *, - migration_strength: float = 0.5, - epsilon: float = 1e-5, - inplace: bool = False, - ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: - """Perform activation-weight smoothing in SmoothQuant. - - Performs the activation-weight smoothing scheme described in SmoothQuant - (Xiao et al., 2023), which migrates the amplitude of outliers from activations - to weights of matmul layers. The function takes in the following parameters: - - Args: - activation_norms: torch.Tensors representing affine parameters - (i.e., beta and gamma) of a normalization layer before each matmul layer. - fc_weights: torch.Tensors representing the weight matrices of the matmul layer. - activation_max: The maximum activation value of inputs of the matmul layer. - migration_strength: the strength of the activation migration. Default is 0.5. - epsilon: The epsilon used for numerical stability when calculating the scales. - Default is 1e-5. - - Returns: - A tuple of three torch.Tensors: (smoothed_activation_norms, smoothed_fc_weights) - - The function calculates "scales" as `pow(|Activation|, migration_strength) / - pow(|Weight|, 1-migration_strength)` and applies the smoothing effect into - a normalization layer that exists before every matmul layer. This is done because - it is more efficient than introducing a new smoothing layer before every matmul layer. - Fusing the smoothing effect into the normalization layer results in a faster and - more efficient implementation of the smoothing scheme. - - The function returns the smoothed normalization coefficients and the smoothed weight - matrices after the smoothing process. - """ - # shape of activation norms: [InChannels] - # shape of fc weights: [OutChannels, InChannels] - # shape of activation_max: [InChannels] - - # pylint: disable=too-many-locals - assert activation_norms - assert fc_weights - - assert activation_norms[0].ndim == 1 - in_channels = activation_norms[0].size(0) - device = activation_norms[0].device - dtype = activation_norms[0].dtype - - for norm in activation_norms: - assert tuple(norm.size()) == (in_channels,) - assert norm.device == device - assert norm.dtype == dtype - - for weight in fc_weights: - assert weight.ndim == 2 - assert weight.size(1) == in_channels - assert weight.device == device - assert weight.dtype == dtype - - activation_max = activation_max.to(device=device) - weight_max = fc_weights[0].abs().max(dim=0).values - for weight in fc_weights[1:]: - weight_max = torch.maximum(weight_max, weight.abs().max(dim=0).values) - - assert tuple(activation_max.size()) == (in_channels,) - assert tuple(weight_max.size()) == (in_channels,) - alpha = migration_strength - scales = ( - ( - activation_max.to(dtype=torch.float32).pow(alpha) - / weight_max.to(dtype=torch.float32).pow(1 - alpha) - ) - .clamp(min=epsilon) - .to(dtype=dtype) - ) - - scaled_activation_norms = [act_norm / scales for act_norm in activation_norms] - scaled_weights = [w * scales.view(1, -1) for w in fc_weights] - - if inplace: - for dst, src in zip(activation_norms, scaled_activation_norms): - dst.copy_(src) - for dst, src in zip(fc_weights, scaled_weights): - dst.copy_(src) - - return scaled_activation_norms, scaled_weights - - def _smooth( - self, - model: torch.nn.Module, - ) -> None: - """Smooths the models before Quantization.""" - model.to(device=torch.device(self.quant_config.device)) - model.eval() - model = cast(SmoothQuantHook, self.hook).pre_smooth(model) - - # collect stats for SmoothQuant scale. - dataset = self.get_calib_dataset() - quant_config = cast(SmoothQuantConfig, self.quant_config) - max_input_stats, _ = collect_stats( - model, - quant_config.device, - dataset, - cast(SmoothQuantHook, self.hook).get_linear_layer_types(), - tqdm_desc="Collecting stats for Smoothing.", - percentile=100.0, - ) - - # TODO change name to pre_act_params, post_act_params - # (attn_fc, ff2 are not scaled with norms) - for norms, weights, name in cast( - SmoothQuantHook, self.hook - ).iter_smooth_norm_weights(model): - self._perform_smoothing( - norms, - weights, - max_input_stats[name], - migration_strength=quant_config.smoothquant_args.migration_strength, - inplace=True, - ) - - def pre_quantize( - self, - model: torch.nn.Module, - ) -> None: - """Pre-procedure that should be called before quantize() is called.""" - self._smooth(model) - - def quantize( - self, - model: torch.nn.Module, - ) -> torch.nn.Module: - """Quantize model with SmoothQuant.""" - dataset = self.get_calib_dataset() - max_input_stats, max_output_stats = collect_stats( - model, - self.quant_config.device, - dataset, - cast(SmoothQuantHook, self.hook).get_linear_layer_types(), - percentile=self.quant_config.percentile, - tqdm_desc="Collecting stats for Static Quantization.", - ) - for quant_input in self.hook.iter_tf_quant_inputs(model): - assert isinstance(quant_input, TFQuantInputs) - quant_result = cast(SmoothQuantHook, self.hook).get_quant_result( - quant_input, - max_input_stats=max_input_stats, - max_output_stats=max_output_stats, - ) - - for field in fields(quant_result): - layer_quant_result = getattr(quant_result, field.name) - if isinstance(layer_quant_result, WeightActQuantResult): - layer = model.get_submodule(layer_quant_result.module_name) - q_layer = WeightActQuantizedLinearLayer.from_layer( - layer, layer_quant_result - ) - quant_result.block.add_module(field.name, q_layer) - - return model diff --git a/friendli/modules/quantizer/smoothquant/models/bloom.py b/friendli/modules/quantizer/smoothquant/models/bloom.py deleted file mode 100644 index 86fc39de..00000000 --- a/friendli/modules/quantizer/smoothquant/models/bloom.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli BloomForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Any, Dict, Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.bloom import ( # type: ignore[import] - BloomConfig, - BloomForCausalLM, -) - -from friendli.modules.converter.base import OneOfConverter -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantBloomHook(SmoothQuantHook): - """SmoothQuant Hook for BloomForCausalLM.""" - - def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter): - """Initialize SmoothQuantBloomHook.""" - super().__init__(quant_config, converter) - self.num_heads = cast(BloomConfig, converter.config).num_attention_heads - self.hidden_size = cast(BloomConfig, converter.config).hidden_size - self.head_size = self.hidden_size // self.num_heads - - def iter_smooth_norm_weights( - self, - model: BloomForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight pr transformer block in BloomForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.input_layernorm.weight.data, - decoder_layer.input_layernorm.bias.data, - ], - [ - decoder_layer.self_attention.query_key_value.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed - yield ( - [ - decoder_layer.post_attention_layernorm.weight.data, - decoder_layer.post_attention_layernorm.bias.data, - ], - [decoder_layer.mlp.dense_h_to_4h.weight.data], # [OutDim, InDim] - f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.self_attention.dense.weight.data], - f"{self.quantized_layer_prefix}{index}.self_attention.dense", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.mlp.dense_4h_to_h.weight.data], - f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h", - ) - - def reshape_qkv_weight( - self, attn_layer: torch.nn.Module - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """Reshapes the qkv weight in BloomForCausalLM for Quantization.""" - qkv_layer = cast(torch.nn.Linear, attn_layer.query_key_value) - split_qkv_weight_list = torch.split(qkv_layer.weight, self.head_size, dim=0) - num_heads = cast(BloomConfig, self.converter.config).num_attention_heads - - [q_weight, k_weight, v_weight] = [ - torch.cat( - [split_qkv_weight_list[j * 3 + i] for j in range(num_heads)], - dim=0, - ).reshape(-1, self.hidden_size) - for i in range(3) - ] - return q_weight, k_weight, v_weight - - def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor: - """Sort max_output_stas for seperating qkv_layer's output_stats.""" - split_qkv_output_stat = torch.split(max_output_stat, self.head_size) - qkv_output_stat_list = [ - torch.cat( - [split_qkv_output_stat[j * 3 + i] for j in range(self.num_heads)], - ) - for i in range(3) - ] - qkv_output_stat = torch.cat(qkv_output_stat_list) - return qkv_output_stat - - def iter_tf_quant_inputs(self, model: BloomForCausalLM) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of BloomForCausalLM.""" - for index, decoder_layer in enumerate(model.transformer.h): - self_attn = decoder_layer.self_attention - q_weight, k_weight, v_weight = self.reshape_qkv_weight(self_attn) - qkv_weight = torch.cat([q_weight, k_weight, v_weight], dim=0) - qkv_weight_out_dim = qkv_weight.size(0) - fc1 = decoder_layer.mlp.dense_h_to_4h - fc2 = decoder_layer.mlp.dense_4h_to_h - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", - 0, - qkv_weight_out_dim // 3, - self.sort_qkv_output_stats, - ), - k=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", - qkv_weight_out_dim // 3, - qkv_weight_out_dim // 3 * 2, - self.sort_qkv_output_stats, - ), - v=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", - qkv_weight_out_dim // 3 * 2, - qkv_weight_out_dim, - self.sort_qkv_output_stats, - ), - attn_fc=QuantInput( - self_attn.dense.weight, - f"{self.quantized_layer_prefix}{index}.self_attention.dense", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in BloomForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.self_attention.dense - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.dense_4h_to_h - - def get_tf_blocks(self, model: BloomForCausalLM) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.transformer.h) diff --git a/friendli/modules/quantizer/smoothquant/models/codegen.py b/friendli/modules/quantizer/smoothquant/models/codegen.py deleted file mode 100644 index 00455186..00000000 --- a/friendli/modules/quantizer/smoothquant/models/codegen.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli CodeGenForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -import copy -from typing import Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.codegen import CodeGenForCausalLM # type: ignore[import] - -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantCodeGenHook(SmoothQuantHook): - """SmoothQuant Hook for CodeGenForCausalLM.""" - - def pre_smooth(self, model: torch.nn.Module) -> torch.nn.Module: - """Pre-procedure for SmoothQuant in CodeGenForCausalLM that should be called before smooth() is called.""" - super().pre_smooth(model) - for decoder_layer in cast(CodeGenForCausalLM, model).transformer.h: - decoder_layer.add_module("ln_2", copy.deepcopy(decoder_layer.ln_1)) - return model - - def iter_smooth_norm_weights( - self, - model: CodeGenForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in CodeGenForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - - for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection, MLP FF 1 ] gets smoothed - yield ( - [ - decoder_layer.ln_1.weight.data, - decoder_layer.ln_1.bias.data, - ], - [ - decoder_layer.attn.qkv_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.attn.qkv_proj", - ) - yield ( - [ - decoder_layer.ln_2.weight.data, - decoder_layer.ln_2.bias.data, - ], - [ - decoder_layer.mlp.fc_in.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.mlp.fc_in", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.attn.out_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.mlp.fc_out.weight.data], - f"{self.quantized_layer_prefix}{index}.mlp.fc_out", - ) - - def reshape_qkv_weight( - self, attn_layer: torch.nn.Module - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """Reshapes the qkv weight in CodeGenForCausalLM for Quantization.""" - qkv_layer = cast(torch.nn.Linear, attn_layer.qkv_proj) - original_qkv_weight = qkv_layer.weight - reshaped_qkv_weight = original_qkv_weight.reshape( - (4, original_qkv_weight.size(0) // 4, original_qkv_weight.size(1)) - ) - q_weight, v_weight, k_weight = torch.split( - reshaped_qkv_weight, reshaped_qkv_weight.size(1) // 3, dim=1 - ) - q_weight = q_weight.reshape((-1, q_weight.size(2))) - k_weight = k_weight.reshape((-1, k_weight.size(2))) - v_weight = v_weight.reshape((-1, v_weight.size(2))) - - return q_weight, k_weight, v_weight - - def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor: - """Sorts the max output stats of qkv_proj in CodeGenForCausalLM.""" - reshpaed_max_output_stat = max_output_stat.reshape( - (4, max_output_stat.size(0) // 4) - ) - q_max_output_stat, v_max_output_stat, k_max_output_stat = torch.split( - reshpaed_max_output_stat, reshpaed_max_output_stat.size(1) // 3, dim=1 - ) - q_max_output_stat = q_max_output_stat.reshape((-1,)) - k_max_output_stat = k_max_output_stat.reshape((-1,)) - v_max_output_stat = v_max_output_stat.reshape((-1,)) - return torch.cat( - (q_max_output_stat, k_max_output_stat, v_max_output_stat), dim=0 - ) - - def iter_tf_quant_inputs( - self, model: CodeGenForCausalLM - ) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of CodeGenForCausalLM.""" - for index, decoder_layer in enumerate(model.transformer.h): - self_attn = decoder_layer.attn - q_weight, k_weight, v_weight = self.reshape_qkv_weight(self_attn) - qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0) - attn_weight_outdim = qkv_weight.size(0) # OutDim - fc1 = decoder_layer.mlp.fc_in - fc2 = decoder_layer.mlp.fc_out - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attn.qkv_proj", - 0, - attn_weight_outdim // 3, - self.sort_qkv_output_stats, - ), - k=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attn.qkv_proj", - attn_weight_outdim // 3, - attn_weight_outdim // 3 * 2, - self.sort_qkv_output_stats, - ), - v=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attn.qkv_proj", - attn_weight_outdim // 3 * 2, - attn_weight_outdim, - self.sort_qkv_output_stats, - ), - attn_fc=QuantInput( - self_attn.out_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.mlp.fc_in", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.mlp.fc_out", - None, - None, - ), - ) - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Returns the list of conversion informations for modified layers in CodeGenForCausalLM.""" - convert_info_list = super().modified_layers_convert_info_list - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ln_2.weight"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.converter.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_2.bias"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.converter.ln_bias_reshape, - ), - ] - ) - - return convert_info_list - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in CodeGenForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.attn.out_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.fc_out - - def get_tf_blocks(self, model: CodeGenForCausalLM) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.transformer.h) diff --git a/friendli/modules/quantizer/smoothquant/models/falcon.py b/friendli/modules/quantizer/smoothquant/models/falcon.py deleted file mode 100644 index 7722f9ba..00000000 --- a/friendli/modules/quantizer/smoothquant/models/falcon.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli FalconForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Any, Dict, Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.falcon import ( # type: ignore[import] - FalconConfig, - FalconForCausalLM, -) - -from friendli.modules.converter.base import OneOfConverter -from friendli.modules.converter.utils import convert_to_gpt_j_params -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantFalconHook(SmoothQuantHook): - """SmoothQuant Hook for FalconForCausalLM.""" - - def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter): - """Initialize SmoothQuantFalconHook.""" - super().__init__(quant_config, converter) - config = cast(FalconConfig, converter.config) - self.num_attention_heads = config.num_attention_heads - self.hidden_size = config.hidden_size - self.head_size = self.hidden_size // self.num_attention_heads - self.rotary_dim = self.head_size - self.num_kv_attention_heads = self.get_num_kv_attention_heads(config) - - def get_num_kv_attention_heads(self, config: FalconConfig) -> int: - """Returns the number of key-value attention heads in FalconForCausalLM.""" - if config.new_decoder_architecture: - if config.num_kv_heads is not None: - return config.num_kv_heads - return config.num_attention_heads - - if config.multi_query: - return 1 - - if config.num_kv_heads is not None: - return config.num_kv_heads - return config.num_attention_heads - - def iter_smooth_norm_weights( - self, - model: FalconForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in FalconForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr] - if cast(FalconConfig, self.converter.config).new_decoder_architecture: - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.ln_attn.weight.data, - decoder_layer.ln_attn.bias.data, - ], - [ - decoder_layer.self_attention.query_key_value.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed - yield ( - [ - decoder_layer.ln_mlp.weight.data, - decoder_layer.ln_mlp.bias.data, - ], - [decoder_layer.mlp.dense_h_to_4h.weight.data], # [OutDim, InDim] - f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h", - ) - else: - # [LayerNorm 1] - [ QKV projection ] gets smoothed ( MLP FF1 is not smoothed. No LayerNorm 2. ) - yield ( - [ - decoder_layer.input_layernorm.weight.data, - ], - [ - decoder_layer.self_attention.query_key_value.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical. - ) - - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.self_attention.dense.weight.data], - f"{self.quantized_layer_prefix}{index}.self_attention.dense", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.mlp.dense_4h_to_h.weight.data], - f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h", - ) - - def reshape_qkv_weight( - self, attn_layer: torch.nn.Module - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """Reshapes the qkv weight in FalconForCausalLM for Quantization.""" - qkv_weight = cast(torch.nn.Linear, attn_layer.query_key_value).weight - num_queries_per_kv = self.num_attention_heads // self.num_kv_attention_heads - - qkv_weight = qkv_weight.reshape( - self.num_kv_attention_heads, - num_queries_per_kv + 2, - self.head_size, - self.hidden_size, - ) - - q_weight = qkv_weight[:, :num_queries_per_kv].reshape( - self.num_kv_attention_heads * num_queries_per_kv, - self.head_size, - self.hidden_size, - ) - k_weight = qkv_weight[:, [-2]].reshape( - self.num_kv_attention_heads, - self.head_size, - self.hidden_size, - ) - v_weight = qkv_weight[:, [-1]].reshape( - self.num_kv_attention_heads * self.head_size, - self.hidden_size, - ) - - q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim) - k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim) - - q_weight = q_weight.reshape( - self.num_kv_attention_heads * num_queries_per_kv * self.head_size, - self.hidden_size, - ) - k_weight = k_weight.reshape( - self.num_kv_attention_heads * self.head_size, - self.hidden_size, - ) - - return q_weight, k_weight, v_weight - - def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor: - """Sort max output stats of qkv_layer in FalconForCausalLM.""" - num_queries_per_kv = self.num_attention_heads // self.num_kv_attention_heads - qkv_output_stat = max_output_stat.reshape( - self.num_kv_attention_heads, - num_queries_per_kv + 2, - self.head_size, - ) - q_out_stats = qkv_output_stat[:, :num_queries_per_kv].reshape( - self.num_kv_attention_heads * num_queries_per_kv, - self.head_size, - ) - k_out_stats = qkv_output_stat[:, [-2]].reshape( - self.num_kv_attention_heads, - self.head_size, - ) - v_out_stats = qkv_output_stat[:, [-1]].reshape( - self.num_kv_attention_heads * self.head_size, - ) - q_out_stats = convert_to_gpt_j_params(q_out_stats, self.rotary_dim) - k_out_stats = convert_to_gpt_j_params(k_out_stats, self.rotary_dim) - q_out_stats = q_out_stats.reshape( - self.num_kv_attention_heads * num_queries_per_kv * self.head_size, - ) - k_out_stats = k_out_stats.reshape( - self.num_kv_attention_heads * self.head_size, - ) - - return torch.cat((q_out_stats, k_out_stats, v_out_stats), dim=0) - - def iter_tf_quant_inputs(self, model: FalconForCausalLM) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of FalconForCausalLM.""" - for index, decoder_layer in enumerate(model.transformer.h): - self_attn = decoder_layer.self_attention - q_weight, k_weight, v_weight = self.reshape_qkv_weight(self_attn) - qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0) - fc1 = decoder_layer.mlp.dense_h_to_4h - fc2 = decoder_layer.mlp.dense_4h_to_h - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", - 0, - q_weight.size(0), - self.sort_qkv_output_stats, - ), - k=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", - q_weight.size(0), - q_weight.size(0) + k_weight.size(0), - self.sort_qkv_output_stats, - ), - v=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", - q_weight.size(0) + k_weight.size(0), - qkv_weight.size(0), - self.sort_qkv_output_stats, - ), - attn_fc=QuantInput( - self_attn.dense.weight, - f"{self.quantized_layer_prefix}{index}.self_attention.dense", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in FalconForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.self_attention.dense - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.dense_4h_to_h - - def get_tf_blocks(self, model: FalconForCausalLM) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.transformer.h) diff --git a/friendli/modules/quantizer/smoothquant/models/gpt2.py b/friendli/modules/quantizer/smoothquant/models/gpt2.py deleted file mode 100644 index 50a20695..00000000 --- a/friendli/modules/quantizer/smoothquant/models/gpt2.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPT2LMHeadModel QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.gpt2 import GPT2LMHeadModel # type: ignore[import] -from transformers.pytorch_utils import Conv1D # type: ignore[import] - -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantGPT2Hook(SmoothQuantHook): - """SmoothQuant Hook for GPT2LMHeadModel.""" - - def iter_smooth_norm_weights( - self, model: GPT2LMHeadModel - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in GPT2LMHeadModel.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.ln_1.weight.data, - decoder_layer.ln_1.bias.data, - ], - [ - decoder_layer.attn.c_attn.weight.data.transpose( - 0, 1 - ), # [OutDim, InDim] - ], - f"{self.quantized_layer_prefix}{index}.attn.c_attn", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed - yield ( - [ - decoder_layer.ln_2.weight.data, - decoder_layer.ln_2.bias.data, - ], - [decoder_layer.mlp.c_fc.weight.data.transpose(0, 1)], # [OutDim, InDim] - f"{self.quantized_layer_prefix}{index}.mlp.c_fc", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data.transpose(0, 1)], - [decoder_layer.attn.c_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.attn.c_proj", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data.transpose(0, 1)], - [decoder_layer.mlp.c_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.mlp.c_proj", - ) - - def iter_tf_quant_inputs(self, model: GPT2LMHeadModel) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of GPT2LMHeadModel.""" - for index, decoder_layer in enumerate(model.transformer.h): - attn = decoder_layer.attn - attn_weight_outdim = attn.c_attn.nf # OutDim - fc1 = decoder_layer.mlp.c_fc - fc2 = decoder_layer.mlp.c_proj - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - attn.c_attn.weight.transpose(0, 1), - f"{self.quantized_layer_prefix}{index}.attn.c_attn", - 0, - attn_weight_outdim // 3, - ), - k=QuantInput( - attn.c_attn.weight.transpose(0, 1), - f"{self.quantized_layer_prefix}{index}.attn.c_attn", - attn_weight_outdim // 3, - attn_weight_outdim // 3 * 2, - ), - v=QuantInput( - attn.c_attn.weight.transpose(0, 1), - f"{self.quantized_layer_prefix}{index}.attn.c_attn", - attn_weight_outdim // 3 * 2, - attn_weight_outdim, - ), - attn_fc=QuantInput( - attn.c_proj.weight.transpose(0, 1), - f"{self.quantized_layer_prefix}{index}.attn.c_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight.transpose(0, 1), - f"{self.quantized_layer_prefix}{index}.mlp.c_fc", - None, - None, - ), - ff2=QuantInput( - fc2.weight.transpose(0, 1), - f"{self.quantized_layer_prefix}{index}.mlp.c_proj", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in GPT2LMHeadModel.""" - return (Conv1D,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.attn.c_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.c_proj - - def get_tf_blocks(self, model: GPT2LMHeadModel) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.transformer.h) diff --git a/friendli/modules/quantizer/smoothquant/models/gpt_neox.py b/friendli/modules/quantizer/smoothquant/models/gpt_neox.py deleted file mode 100644 index d2df5090..00000000 --- a/friendli/modules/quantizer/smoothquant/models/gpt_neox.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPTNeoXForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Any, Dict, Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.gpt_neox import ( # type: ignore[import] - GPTNeoXConfig, - GPTNeoXForCausalLM, -) - -from friendli.modules.converter.base import OneOfConverter -from friendli.modules.converter.utils import convert_to_gpt_j_params -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantGPTNeoXHook(SmoothQuantHook): - """SmoothQuant Hook for GPTNeoXForCausalLM.""" - - def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter): - """Initialize SmoothQuantGPTNeoXHook.""" - super().__init__(quant_config, converter) - config = cast(GPTNeoXConfig, converter.config) - self.num_attention_heads = config.num_attention_heads - self.num_kv_attention_heads = config.num_attention_heads - self.hidden_size = config.hidden_size - self.head_size = self.hidden_size // self.num_attention_heads - self.rotary_dim = int(self.head_size * config.rotary_pct) - - def iter_smooth_norm_weights( - self, - model: GPTNeoXForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in GPTNeoXForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - for index, decoder_layer in enumerate(model.gpt_neox.layers): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.input_layernorm.weight.data, - decoder_layer.input_layernorm.bias.data, - ], - [ - decoder_layer.attention.query_key_value.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed - yield ( - [ - decoder_layer.post_attention_layernorm.weight.data, - decoder_layer.post_attention_layernorm.bias.data, - ], - [decoder_layer.mlp.dense_h_to_4h.weight.data], # [OutDim, InDim] - f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.attention.dense.weight.data], - f"{self.quantized_layer_prefix}{index}.attention.dense", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.mlp.dense_4h_to_h.weight.data], - f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h", - ) - - def reshape_qkv_weight( - self, attn_layer: torch.nn.Module - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """Reshape GPTNeoXForCausalLM's qkv weight for int8 quantization.""" - qkv_weight = cast(torch.nn.Linear, attn_layer).weight - qkv_weight = qkv_weight.reshape( - self.num_attention_heads, - 3, - self.head_size, - self.hidden_size, - ) - - q_weight = qkv_weight[:, 0].reshape( - self.num_attention_heads, - self.head_size, - self.hidden_size, - ) - k_weight = qkv_weight[:, 1].reshape( - self.num_attention_heads, - self.head_size, - self.hidden_size, - ) - v_weight = qkv_weight[:, 2].reshape( - self.num_attention_heads * self.head_size, - self.hidden_size, - ) - - q_weight = convert_to_gpt_j_params(param=q_weight, rotary_dim=self.rotary_dim) - k_weight = convert_to_gpt_j_params(param=k_weight, rotary_dim=self.rotary_dim) - q_weight = q_weight.reshape( - self.num_attention_heads * self.head_size, - self.hidden_size, - ) - k_weight = k_weight.reshape( - self.num_attention_heads * self.head_size, - self.hidden_size, - ) - return q_weight, k_weight, v_weight - - def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor: - """Sort max output stats of qkv_layer in GPTNeoXForCausalLM.""" - max_output_stat = max_output_stat.reshape( - self.num_attention_heads, - 3, - self.head_size, - ) - q_output_stat = max_output_stat[:, 0].reshape( - self.num_attention_heads, - self.head_size, - ) - k_output_stat = max_output_stat[:, 1].reshape( - self.num_attention_heads, - self.head_size, - ) - v_output_stat = max_output_stat[:, 2].reshape( - self.num_attention_heads * self.head_size, - ) - q_output_stat = convert_to_gpt_j_params(q_output_stat, self.rotary_dim) - k_output_stat = convert_to_gpt_j_params(k_output_stat, self.rotary_dim) - q_output_stat = q_output_stat.reshape( - self.num_attention_heads * self.head_size, - ) - k_output_stat = k_output_stat.reshape( - self.num_attention_heads * self.head_size, - ) - return torch.cat((q_output_stat, k_output_stat, v_output_stat), dim=0) - - def iter_tf_quant_inputs( - self, model: GPTNeoXForCausalLM - ) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of GPTNeoXForCausalLM.""" - for index, decoder_layer in enumerate(model.gpt_neox.layers): - attention = decoder_layer.attention - attention_weight_outdim = attention.query_key_value.weight.size(0) # OutDim - q_weight, k_weight, v_weight = self.reshape_qkv_weight( - attention.query_key_value - ) - qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0) - fc1 = decoder_layer.mlp.dense_h_to_4h - fc2 = decoder_layer.mlp.dense_4h_to_h - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attention.query_key_value", - 0, - attention_weight_outdim // 3, - self.sort_qkv_output_stats, - ), - k=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attention.query_key_value", - attention_weight_outdim // 3, - attention_weight_outdim // 3 * 2, - self.sort_qkv_output_stats, - ), - v=QuantInput( - qkv_weight, - f"{self.quantized_layer_prefix}{index}.attention.query_key_value", - attention_weight_outdim // 3 * 2, - attention_weight_outdim, - self.sort_qkv_output_stats, - ), - attn_fc=QuantInput( - attention.dense.weight, - f"{self.quantized_layer_prefix}{index}.attention.dense", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in GPTNeoXForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.attention.dense - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.dense_4h_to_h - - def get_tf_blocks(self, model: GPTNeoXForCausalLM) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.gpt_neox.layers) diff --git a/friendli/modules/quantizer/smoothquant/models/gptj.py b/friendli/modules/quantizer/smoothquant/models/gptj.py deleted file mode 100644 index 77e15732..00000000 --- a/friendli/modules/quantizer/smoothquant/models/gptj.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli GPTJForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -import copy -from typing import Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.gptj import GPTJForCausalLM # type: ignore[import] - -from friendli.modules.converter.base import DECODER_PREFIX -from friendli.modules.converter.schema import ConvertInfo -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantGPTJHook(SmoothQuantHook): - """SmoothQuant Hook for GPTJForCausalLM.""" - - def pre_smooth(self, model: torch.nn.Module) -> torch.nn.Module: - """Pre-procedure for SmoothQuant in GPTJForCausalLM that should be called before smooth() is called.""" - super().pre_smooth(model) - for decoder_layer in cast(GPTJForCausalLM, model).transformer.h: - decoder_layer.add_module("ln_2", copy.deepcopy(decoder_layer.ln_1)) - return model - - def iter_smooth_norm_weights( - self, - model: GPTJForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in GPTJForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection] gets smoothed - yield ( - [ - decoder_layer.ln_1.weight.data, - decoder_layer.ln_1.bias.data, - ], - [ - decoder_layer.attn.q_proj.weight.data, # [OutDim, InDim] - decoder_layer.attn.k_proj.weight.data, # [OutDim, InDim] - decoder_layer.attn.v_proj.weight.data, # [OutDim, InDim] - ], - f"{self.quantized_layer_prefix}{index}.attn.q_proj", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 1] - [ MLP FF1 ] gets smoothed - yield ( - [ - decoder_layer.ln_2.weight.data, - decoder_layer.ln_2.bias.data, - ], - [ - decoder_layer.mlp.fc_in.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.mlp.fc_in", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.attn.out_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.mlp.fc_out.weight.data], - f"{self.quantized_layer_prefix}{index}.mlp.fc_out", - ) - - def iter_tf_quant_inputs(self, model: GPTJForCausalLM) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of GPTJForCausalLM.""" - for index, decoder_layer in enumerate(model.transformer.h): - attn = decoder_layer.attn - fc1 = decoder_layer.mlp.fc_in - fc2 = decoder_layer.mlp.fc_out - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - attn.q_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.q_proj", - None, - None, - ), - k=QuantInput( - attn.k_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.k_proj", - None, - None, - ), - v=QuantInput( - attn.v_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.v_proj", - None, - None, - ), - attn_fc=QuantInput( - attn.out_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.mlp.fc_in", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.mlp.fc_out", - None, - None, - ), - ) - - @property - def modified_layers_convert_info_list( - self, - ) -> List[ConvertInfo]: - """Returns the modified layers' convert dict in GPTJForCausalLM.""" - convert_info_list = super().modified_layers_convert_info_list - - for i in range(self.converter.decoder_layer_num): - layer_prefix = f"{self.quantized_layer_prefix}{i}." - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - convert_info_list.extend( - [ - ConvertInfo( - param_names=[f"{layer_prefix}ln_2.weight"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}ln_2/gamma:0", - reshape_fn=self.converter.ln_weight_reshape, - ), - ConvertInfo( - param_names=[f"{layer_prefix}ln_2.bias"], - data_type=self.converter.data_type, - converted_name=f"{converted_prefix}ln_2/beta:0", - reshape_fn=self.converter.ln_bias_reshape, - ), - ] - ) - - return convert_info_list - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in GPTJForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.attn.out_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.fc_out - - def get_tf_blocks(self, model: GPTJForCausalLM) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.transformer.h) diff --git a/friendli/modules/quantizer/smoothquant/models/llama.py b/friendli/modules/quantizer/smoothquant/models/llama.py deleted file mode 100644 index 5256401a..00000000 --- a/friendli/modules/quantizer/smoothquant/models/llama.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli LlamaForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -import copy -from dataclasses import dataclass -from typing import Any, Dict, Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.llama import ( # type: ignore[import] - LlamaConfig, - LlamaForCausalLM, -) - -from friendli.modules.converter.base import DECODER_PREFIX, OneOfConverter -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ( - ModuleName, - QuantInput, - TFQuantInputs, - TFQuantResults, - WeightActQuantResult, -) -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook -from friendli.modules.quantizer.utils import get_weight_act_quant_scales - - -@dataclass -class LlamaTFQuantInput(TFQuantInputs): - """Dataclass for int8 quantization input per layer in LlamaForCausalLM.""" "" - - ff_gate: QuantInput - - -@dataclass -class LlamaTFQuantResults(TFQuantResults): - """Dataclass for int8 quantization result per a transformer block in LlamaForCausalLM.""" "" - - ff_gate: WeightActQuantResult - - -class SmoothQuantLlamaHook(SmoothQuantHook): - """SmoothQuant Hook for LlamaForCausalLM.""" - - def __init__(self, quant_config: SmoothQuantConfig, converter: OneOfConverter): - """Initialize SmoothQuantLlamaHook.""" - super().__init__(quant_config, converter) - config = cast(LlamaConfig, converter.config) - self.num_attention_heads = config.num_attention_heads - if config.num_key_value_heads is None: - self.num_kv_attention_heads = self.num_attention_heads - else: - self.num_kv_attention_heads = config.num_key_value_heads - self.hidden_size = config.hidden_size - self.head_size = self.hidden_size // self.num_attention_heads - self.rotary_dim = self.head_size - - def pre_smooth(self, model: torch.nn.Module) -> torch.nn.Module: - """Pre-procedure for SmoothQuant in LlamaForCausalLM that should be called before smooth() is called.""" - super().pre_smooth(model) - for decoder_layer in cast(LlamaForCausalLM, model).model.layers: - decoder_layer.add_module( - "post_attention_layernorm_2", - copy.deepcopy(decoder_layer.post_attention_layernorm), - ) - return model - - def iter_smooth_norm_weights( - self, - model: LlamaForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in LlamaForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - - for index, decoder_layer in enumerate(model.model.layers): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.input_layernorm.weight.data, - ], - [ - decoder_layer.self_attn.q_proj.weight.data, - decoder_layer.self_attn.k_proj.weight.data, - decoder_layer.self_attn.v_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed - yield ( - [ - decoder_layer.post_attention_layernorm.weight.data, - ], - [ - decoder_layer.mlp.up_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.mlp.up_proj", - ) - # [LayerNomr 2] = [ MLP GATED FF ] gets smoothed - yield ( - [ - decoder_layer.post_attention_layernorm_2.weight.data, - ], - [ - decoder_layer.mlp.gate_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.mlp.gate_proj", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.self_attn.o_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - ) - - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.mlp.down_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.mlp.down_proj", - ) - - def iter_tf_quant_inputs(self, model: LlamaForCausalLM) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of LlamaForCausalLM.""" - for index, decoder_layer in enumerate(model.model.layers): - self_attn = decoder_layer.self_attn - fc1 = decoder_layer.mlp.up_proj - ff_gate = decoder_layer.mlp.gate_proj - fc2 = decoder_layer.mlp.down_proj - - yield LlamaTFQuantInput( - layer_index=index, - block=decoder_layer, - q=QuantInput( - self_attn.q_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", - None, - None, - ), - k=QuantInput( - self_attn.k_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.k_proj", - None, - None, - ), - v=QuantInput( - self_attn.v_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.v_proj", - None, - None, - ), - attn_fc=QuantInput( - self_attn.o_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.mlp.up_proj", - None, - None, - ), - ff_gate=QuantInput( - ff_gate.weight, - f"{self.quantized_layer_prefix}{index}.mlp.gate_proj", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.mlp.down_proj", - None, - None, - ), - ) - - def get_quant_result( - self, - quant_input: TFQuantInputs, - **kwargs: Any, - ) -> TFQuantResults: - """Returns the quantization result for a specific layer in LlamaForCausalLM.""" - max_input_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_input_stats"] - max_output_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_output_stats"] - - def get_scale(quant_input: QuantInput) -> WeightActQuantResult: - weight, name, start, end = ( - quant_input.weight, - quant_input.name, - quant_input.start_offset, - quant_input.end_offset, - ) - return get_weight_act_quant_scales( - name, - max_input_stats[name], - weight[start:end], - max_output_stats[name][start:end], - ) - - quant_input = cast(LlamaTFQuantInput, quant_input) - return LlamaTFQuantResults( - layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_input.layer_index}.", - q=get_scale(quant_input.q), - k=get_scale(quant_input.k), - v=get_scale(quant_input.v), - attn_fc=get_scale(quant_input.attn_fc), - ff1=get_scale(quant_input.ff1), - ff_gate=get_scale(quant_input.ff_gate), - ff2=get_scale(quant_input.ff2), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in LlamaForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.self_attn.o_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.down_proj - - def get_tf_blocks(self, model: LlamaForCausalLM) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.model.layers) - - @property - def quantized_param_names(self) -> List[str]: - """Returns the parameter names in LlamaForCausalLM.""" - param_names = super().quantized_param_names - for i in range(self.converter.decoder_layer_num): - converted_prefix = f"{DECODER_PREFIX}/h_._{i}/" - param_names.append(f"{converted_prefix}mlp/c_gate/weight:0") - return param_names diff --git a/friendli/modules/quantizer/smoothquant/models/mpt.py b/friendli/modules/quantizer/smoothquant/models/mpt.py deleted file mode 100644 index a72561fd..00000000 --- a/friendli/modules/quantizer/smoothquant/models/mpt.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli MPTForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type, cast - -import torch - -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantMPTHook(SmoothQuantHook): - """SmoothQuant Hook for MPTForCausalLM.""" - - def iter_smooth_norm_weights( - self, - model: torch.nn.Module, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in MPTForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - - for index, decoder_layer in enumerate( - model.transformer.blocks # type: ignore[union-attr, arg-type] - ): - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [decoder_layer.norm_1.weight.data], - [decoder_layer.attn.Wqkv.weight.data], - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - ) - # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed - yield ( - [decoder_layer.norm_2.weight.data], - [decoder_layer.ffn.up_proj.weight.data], # [OutDim, InDim] - f"{self.quantized_layer_prefix}{index}.ffn.up_proj", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.attn.out_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.ffn.down_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.ffn.down_proj", - ) - - def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of MPTForCausalLM.""" - for index, decoder_layer in enumerate( - model.transformer.blocks # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.attn - q_outdim = ( - self.converter.decoder_num_attention_heads - * self.converter.decoder_head_size - ) - kv_outdim = ( - self.converter.decoder_num_kv_attention_heads - * self.converter.decoder_head_size - ) - qkv_outdim = self_attn.Wqkv.weight.size(0) - assert qkv_outdim == q_outdim + kv_outdim * 2 - fc1 = decoder_layer.ffn.up_proj - fc2 = decoder_layer.ffn.down_proj - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - self_attn.Wqkv.weight, - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - 0, - q_outdim, - ), - k=QuantInput( - self_attn.Wqkv.weight, - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - q_outdim, - q_outdim + kv_outdim, - ), - v=QuantInput( - self_attn.Wqkv.weight, - f"{self.quantized_layer_prefix}{index}.attn.Wqkv", - q_outdim + kv_outdim, - qkv_outdim, - ), - attn_fc=QuantInput( - self_attn.out_proj.weight, - f"{self.quantized_layer_prefix}{index}.attn.out_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.ffn.up_proj", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.ffn.down_proj", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in MPTForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.attn.out_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.ffn.down_proj - - def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.transformer.blocks) diff --git a/friendli/modules/quantizer/smoothquant/models/opt.py b/friendli/modules/quantizer/smoothquant/models/opt.py deleted file mode 100644 index ed6d8292..00000000 --- a/friendli/modules/quantizer/smoothquant/models/opt.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli OPTForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type, cast - -import torch -from transformers.models.opt import OPTForCausalLM # type: ignore[import] - -from friendli.modules.quantizer.schema.config import SmoothQuantConfig -from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs -from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook - - -class SmoothQuantOPTHook(SmoothQuantHook): - """SmoothQuant Hook for OPTForCausalLM.""" - - def iter_smooth_norm_weights( - self, model: OPTForCausalLM - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of layernorm's weight and linear layer's weight per transformer block in OPTForCausalLM.""" - quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args - for index, decoder_layer in enumerate(model.model.decoder.layers): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.self_attn_layer_norm.weight.data, - decoder_layer.self_attn_layer_norm.bias.data, - ], - [ - decoder_layer.self_attn.q_proj.weight.data, - decoder_layer.self_attn.k_proj.weight.data, - decoder_layer.self_attn.v_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed - yield ( - [ - decoder_layer.final_layer_norm.weight.data, - decoder_layer.final_layer_norm.bias.data, - ], - [decoder_layer.fc1.weight.data], - f"{self.quantized_layer_prefix}{index}.fc1", - ) - if quant_args.attn_fc_smoothing: - yield ( - [decoder_layer.attn_fc_pre_smoother.scale.data], - [decoder_layer.self_attn.out_proj.weight.data], - f"{self.quantized_layer_prefix}{index}.self_attn.out_proj", - ) - if quant_args.ff2_smoothing: - yield ( - [decoder_layer.ff2_pre_smoother.scale.data], - [decoder_layer.fc2.weight.data], - f"{self.quantized_layer_prefix}{index}.fc2", - ) - - def iter_tf_quant_inputs(self, model: OPTForCausalLM) -> Iterator[TFQuantInputs]: - """Returns the layers which should be quantized in transformer block of OPTForCausalLM.""" - for index, decoder_layer in enumerate(model.model.decoder.layers): - self_attn = decoder_layer.self_attn - fc1 = decoder_layer.fc1 - fc2 = decoder_layer.fc2 - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - q=QuantInput( - self_attn.q_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", - None, - None, - ), - k=QuantInput( - self_attn.k_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.k_proj", - None, - None, - ), - v=QuantInput( - self_attn.v_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.v_proj", - None, - None, - ), - attn_fc=QuantInput( - self_attn.out_proj.weight, - f"{self.quantized_layer_prefix}{index}.self_attn.out_proj", - None, - None, - ), - ff1=QuantInput( - fc1.weight, - f"{self.quantized_layer_prefix}{index}.fc1", - None, - None, - ), - ff2=QuantInput( - fc2.weight, - f"{self.quantized_layer_prefix}{index}.fc2", - None, - None, - ), - ) - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Returns the linear layer types in OPTForCausalLM.""" - return (torch.nn.Linear,) - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after attention in the decoder layer.""" - return decoder_layer.self_attn.out_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the linear layer after FF1 in the decoder layer.""" - return decoder_layer.fc2 - - def get_tf_blocks(self, model: OPTForCausalLM) -> List[torch.nn.Module]: - """Returns the decoder layers(transformer blocks) in the model.""" - return list(model.model.decoder.layers) diff --git a/friendli/modules/quantizer/utils.py b/friendli/modules/quantizer/utils.py deleted file mode 100644 index 1e47030b..00000000 --- a/friendli/modules/quantizer/utils.py +++ /dev/null @@ -1,514 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Quantizer Utils.""" - -from __future__ import annotations - -import os -from contextlib import contextmanager -from itertools import islice -from typing import ( - Any, - Callable, - Dict, - Iterable, - Iterator, - List, - Protocol, - Sequence, - Tuple, - Type, - TypeVar, - Union, -) - -import datasets # type: ignore[import] -import torch -from accelerate import cpu_offload_with_hook # type: ignore -from torch.utils.data import DataLoader -from tqdm import tqdm - -from friendli.enums import ModelDataType -from friendli.errors import InvalidConfigError, QuantizationError -from friendli.logging import logger -from friendli.modules.quantizer.schema.config import CalibrationDatasetConfig -from friendli.modules.quantizer.schema.data import ( - ModuleName, - WeightActQuantResult, - WeightOnlyQuantResult, -) - - -def scale_reshape( - params: List[torch.Tensor], -) -> torch.Tensor: - """Reshape scale/zero of quantized layers.""" - if len(params) == 1: - t = params[0] - else: - t = torch.cat(params, dim=1) - return t - - -def quantized_qkv_weight_reshape( - params: List[torch.Tensor], -) -> torch.Tensor: - """Reshape weight of quantized qkv layers.""" - assert len(params) == 3 - qkv_weight = torch.concat( - params, - dim=0, - ) # [OutDim, InDim] - - return qkv_weight.to(torch.uint8) - - -def quantized_linear_weight_reshape( - params: List[torch.Tensor], -) -> torch.Tensor: - """Reshape weight of quantized linear layers.""" - assert len(params) == 1 - - return params[0].to(torch.uint8) - - -def safe_load_datasets(data_cfg: CalibrationDatasetConfig) -> datasets.Dataset: - """Load dataset from calibration dataset config.""" - data_path = data_cfg.path_or_name - data_split = data_cfg.split - - try: - if os.path.exists(data_path): - dataset = datasets.load_dataset( - data_cfg.format, - data_files=data_path, - split=data_split, - ) - else: - data_name_parts = data_path.split(":") - if len(data_name_parts) == 1: - dataset = datasets.load_dataset(data_path, split=data_split) - elif len(data_name_parts) == 2: - data_name, subset_name = data_name_parts - dataset = datasets.load_dataset( - data_name, subset_name, split=data_split - ) - else: - raise InvalidConfigError( - "Dataset name is in invalid format. " - "(valid format: '' or ':')" - ) - except ValueError as err: - raise QuantizationError(f"datasets.load_dataset failed. {str(err)}") from err - - if not isinstance(dataset, datasets.Dataset): - raise InvalidConfigError( - "This dataset format is not supported for the calibration." - ) - - return dataset - - -T = TypeVar("T") - - -def batched(it: Iterator[T], n: int) -> Iterator[List[T]]: - """Batch an iterator into lists of size n.""" - # batched('ABCDEFG', 3) --> ABC DEF G - while True: - batch = list(islice(it, n)) - if not batch: - return - yield batch - - -def build_percentile_statistics( - scale_percentile: float, - symmetric: bool = True, -) -> Tuple[Callable, Callable, Callable]: - """Builds the hooks for getting the max input and output activations of a model.""" - logger.info( - "Building percentile statistics hooks. scale_percentile: (%s)", - scale_percentile, - ) - - max_input_M1: Dict[str, torch.Tensor] = {} - max_input_M2: Dict[str, torch.Tensor] = {} - max_input_num: Dict[str, torch.Tensor] = {} - max_output_M1: Dict[str, torch.Tensor] = {} - max_output_M2: Dict[str, torch.Tensor] = {} - max_output_num: Dict[str, torch.Tensor] = {} - - def create_hook(name: ModuleName): - def update_stats( - max_M1: Dict[str, torch.Tensor], - max_M2: Dict[str, torch.Tensor], - max_num: Dict[str, int], - new_t: torch.Tensor, - ) -> None: - # Chan's method for computing mean and variance incrementally - new_t = new_t.detach().reshape(-1, new_t.size(-1)) - new_numel = new_t.size(0) - new_t_M1 = new_t.to(torch.float64).mean(dim=0) - if symmetric: - # it is assumed samples are always centered on zero - # in the symmetric quantization scheme - new_t_M1.zero_() - new_t_M2 = ((new_t.to(torch.float64) - new_t_M1) ** 2).sum(dim=0) - try: - pre_numel = max_num[name] - max_num[name] += new_numel - delta = new_t_M1 - max_M1[name] - max_M1[name] += delta * (new_numel / max_num[name]) - max_M2[name] += new_t_M2 + torch.pow(delta, 2) * ( - pre_numel * new_numel / max_num[name] - ) - except KeyError: - max_num[name] = new_numel - max_M1[name] = new_t_M1 - max_M2[name] = new_t_M2 - - def hook(module, in_t_tup, out_t): # pylint: disable=unused-argument - with torch.no_grad(): - in_t = in_t_tup[0] - update_stats(max_input_M1, max_input_M2, max_input_num, in_t) - update_stats(max_output_M1, max_output_M2, max_output_num, out_t) - - return hook - - def finish_input_stats(): - return { - name: torch.distributions.Normal( - loc=max_input_M1[name], - scale=torch.sqrt(max_input_M2[name] / max_input_num[name]).clip( - min=1e-7 - ), - ).icdf( - torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to( - max_input_M1[name].device - ) - ) - for name in list(max_input_M1.keys()) - } - - def finish_output_stats(): - return { - name: torch.distributions.Normal( - loc=max_output_M1[name], - scale=torch.sqrt(max_output_M2[name] / max_output_num[name]).clip( - min=1e-7 - ), - ).icdf( - torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to( - max_output_M1[name].device - ) - ) - for name in list(max_output_M1.keys()) - } - - return finish_input_stats, finish_output_stats, create_hook - - -def build_max_statistics() -> Tuple[Callable, Callable, Callable]: - """Builds the hooks for getting the max input and output activations of a model.""" - logger.info("Building max statistics hooks") - max_input_stats: Dict[str, torch.Tensor] = {} - max_output_stats: Dict[str, torch.Tensor] = {} - - def create_hook(name: ModuleName): - def hook(modules, in_t_tup, out_t): # pylint: disable=unused-argument - in_t = in_t_tup[0] - in_t = ( - in_t.detach().abs().reshape(-1, in_t.size(-1)).max(dim=0).values - ) # reduce-max only leaving the hidden dim (supposing the last dim is the hidden dim) - out_t = out_t.detach().reshape(-1, out_t.size(-1)) - out_t = out_t.abs().max(dim=0).values - try: - max_input_stats[name] = torch.maximum(max_input_stats[name], in_t) - except KeyError: - max_input_stats[name] = in_t - try: - max_output_stats[name] = torch.maximum(max_output_stats[name], out_t) - except KeyError: - max_output_stats[name] = out_t - - return hook - - def finish_input_stats(): - return max_input_stats - - def finish_output_stats(): - return max_output_stats - - return finish_input_stats, finish_output_stats, create_hook - - -@torch.no_grad() -def collect_stats( - model: torch.nn.Module, - device: str, - dataset: datasets.Dataset, - target_classes: Tuple[Type[torch.nn.Module], ...], - tqdm_desc: str, - percentile: float, - batch_size: int = 1, -) -> Tuple[Dict[ModuleName, torch.Tensor], Dict[ModuleName, torch.Tensor]]: - """Collects the maximum values of input and output activations of a specific model. - - Args: - model (torch.nn.Module): The model for which we want to collect the max statistics. - dataset (Dataset): Dataset that contains input tensors. - target_classes (Tuple[Type[torch.nn.Module], ...]): A tuple of the target classes. - - Returns: - A tuple of two dictionaries: (max_input_stats, max_output_stats), where: - max_input_stats: The maximum input activation values for each module of the model. - max_output_stats: The maximum output activation values for each module of the model. - - This function uses a forward hook to capture the maximum input and output activation values - of the specified target_classes. The max_batch_size parameter controls the size of the input - batches that are passed through the model. - - The function returns two dictionaries containing the maximum input and output activation - values for each module of the model, respectively. These dictionaries can be used to calculate - scaling factors for weight quantization and activation smoothing. - - """ - # pylint: disable=too-many-locals - max_input_stats, max_output_stats, create_hook = ( - build_percentile_statistics(percentile) - if percentile < 100.0 - else build_max_statistics() - ) - name_mods = [ - (name, module) - for name, module in model.named_modules() - if isinstance(module, target_classes) - ] - - calib_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False) - removables = [] - for name, module in name_mods: - removables.append(module.register_forward_hook(create_hook(name))) - try: - for inputs in tqdm(calib_dataloader, desc=tqdm_desc): - model(inputs.to(device)) - finally: - for removable in removables: - removable.remove() - return max_input_stats(), max_output_stats() - - -def build_inps_hook(): - """Builds the hooks for getting the input and output activations of a module.""" - args_dict = {} - kwargs_dict = {} - - def create_hook(name: ModuleName): - def hook(m, args, kwargs, y): # pylint: disable=unused-argument - assert name not in args_dict - assert name not in kwargs_dict - # assumption: all positional arguments are torch.Tensor - args_dict[name] = [t.detach() for t in args] - kwargs_dict[name] = { - k: (v.detach() if isinstance(v, torch.Tensor) else v) - for k, v in kwargs.items() - } - - return hook - - return args_dict, kwargs_dict, create_hook - - -def collect_inps( - module: torch.nn.Module, - module_args: Tuple[Any, ...], - module_kwargs: Dict[str, Any], - device: str, - target_classes: Tuple[Type[torch.nn.Module], ...], -) -> Tuple[Dict[ModuleName, Tuple[Any]], Dict[ModuleName, Dict[str, Any]]]: - """Collects concated input and output activations of a specific module.""" - args_dict, kwargs_dict, create_hook = build_inps_hook() - name_mods = [ - (name, m) for name, m in module.named_modules() if isinstance(m, target_classes) - ] - - removables = [] - for name, m in name_mods: - removables.append(m.register_forward_hook(create_hook(name), with_kwargs=True)) - - module( - *((t.to(device) if isinstance(t, torch.Tensor) else t) for t in module_args), - **{ - k: (v.to(device) if isinstance(v, torch.Tensor) else v) - for k, v in module_kwargs.items() - }, - ) - - for removable in removables: - removable.remove() - - return args_dict, kwargs_dict - - -def get_torch_quant_dtype(q_bit: int = 8): - """Get torch quant data type from quant bit.""" - if q_bit == 8: - return torch.int8 - if q_bit == 4: - return torch.int32 # In AWQ, we use int32 to represent int4 - raise ValueError(f"Invalid quant bit: {q_bit}") - - -@torch.no_grad() -def get_weight_act_quant_scales( - layer_name: str, - input_max: torch.Tensor, - target_weight: torch.Tensor, - weight: torch.Tensor, - output_max: torch.Tensor, - device: str = "cpu", - quant_dtype: ModelDataType = ModelDataType.INT8, -) -> WeightActQuantResult: - """Get the quantization scales and int8 weight for a specific layer.""" - # shape of input_max: [InChannels] - # shape of output_max: [OutChannels] - # shape of target_weight: [OutChannels, InChannels] - assert input_max.ndim == 1 - assert output_max.ndim == 1 - - assert quant_dtype == ModelDataType.INT8 - - in_channels = input_max.size(0) - out_channels = output_max.size(0) - assert tuple(weight.size()) == (out_channels, in_channels) - - max_val = 2 ** (8 - 1) - 1 - min_val = -(2 ** (8 - 1)) - - act_scale = float(input_max.detach().abs().max().item()) / float(max_val) - weight_scale = float(target_weight.detach().abs().max().item()) / float(max_val) - - q_weight = ( - (weight.detach().float() / weight_scale) - .round() - .clip(min_val, max_val) - .to(get_torch_quant_dtype(8)) - .to(device) - ) - - return WeightActQuantResult( - layer_name, - quant_dtype=quant_dtype, - zero_point=torch.tensor(0.0), - act_scale=torch.tensor(act_scale), - weight_scale=torch.tensor(weight_scale), - q_weight=q_weight, - q_group_size=-1, - ) - - -def get_weight_only_quant_scales( - w: torch.Tensor, - q_bit: int, - q_group_size: int, - layer_name: str = "", - device: Union[str, torch.device] = "cpu", -) -> WeightOnlyQuantResult: - """Return the quantization scales of weight for a specific layer.""" - assert q_bit in [4, 8] - org_w_shape = w.shape # [OutDim, InDim] - - w = w.reshape(-1, q_group_size) # [OutDim x num_groups, group_size] - max_val = w.amax(dim=1, keepdim=True) - min_val = w.amin(dim=1, keepdim=True) - - max_int = 2**q_bit - 1 - min_int = 0 - - scales = (max_val - min_val).clamp(min=1e-5) / max_int - zeros = (-torch.round(min_val / scales)).clamp_(min_int, max_int) - - assert torch.isnan(scales).sum() == 0 - - q_weight = torch.clamp(torch.round(w / scales) + zeros, min_int, max_int) - q_weight = q_weight.reshape(org_w_shape).detach().to(device) - scales = ( - scales.view(org_w_shape[0], -1).transpose(0, 1).detach().to(device) - ) # [OutDim, num_groups] - zeros = ( - zeros.view(org_w_shape[0], -1).transpose(0, 1).detach().to(device) - ) # [OutDim, num_groups] - - assert torch.isnan(q_weight).sum() == 0 - - return WeightOnlyQuantResult( - layer_name, - quant_dtype=ModelDataType.INT4 if q_bit == 4 else ModelDataType.INT8, - zero_point=zeros, - q_group_size=q_group_size, - weight_scale=scales, - q_weight=q_weight, - ) - - -def send_model_to_device( - model: torch.nn.Module, - device: Union[str, torch.device], - *, - exclude: Iterable[torch.nn.Module] = (), -): - """Send the model and its submodules onto device except for modules designated by `exclude`.""" - exclude_set = set(exclude) - - @torch.no_grad() - def recurse(m: torch.nn.Module): - if m in exclude_set: - return - for name, p in list(m.named_parameters(recurse=False)): - m.register_parameter(name, torch.nn.Parameter(p.to(device))) - for name, b in list(m.named_buffers(recurse=False)): - m.register_buffer(name, b.to(device)) - - for child in m.children(): - recurse(child) - - recurse(model) - - -class RemovableOffloaderHook(Protocol): - """Hook protocol for cpu offloader.""" - - def offload(self) -> None: - """Offload the associated block onto CPU.""" - - def remove(self) -> None: - """Remove this hook.""" - - -@contextmanager -def offload_module_sequence( - blocks: Sequence[torch.nn.Module], device: Union[str, torch.device] -): - """Offload a sequence of torch modules automatically. - - In the beginning, all blocks are supposed to reside on CPU. - When i-th block is called, it is loaded onto `device` on the fly. - And at the same time, it offloads (i-1)-th block back to CPU. - """ - module_hooks: List[RemovableOffloaderHook] = [] - if blocks: - prev_module_hook = None - for tf_block in blocks: - _, module_hook = cpu_offload_with_hook( - tf_block, device, prev_module_hook=prev_module_hook - ) - prev_module_hook = module_hook - module_hooks.append(module_hook) - try: - yield - finally: - for hook in module_hooks: - hook.offload() - for hook in module_hooks: - hook.remove() diff --git a/friendli/modules/quantizer_v2/__init__.py b/friendli/modules/quantizer_v2/__init__.py deleted file mode 100644 index 9ee5a33d..00000000 --- a/friendli/modules/quantizer_v2/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer V2.""" diff --git a/friendli/modules/quantizer_v2/base.py b/friendli/modules/quantizer_v2/base.py deleted file mode 100644 index 08c48f2d..00000000 --- a/friendli/modules/quantizer_v2/base.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantization Interface.""" - -from __future__ import annotations - -import os -from abc import ABC, abstractmethod -from contextlib import contextmanager -from typing import Any, Dict, Iterator, List, Tuple, Type - -import huggingface_hub # type: ignore -import torch -from torch.utils.data import DataLoader -from tqdm import tqdm -from transformers import PretrainedConfig, PreTrainedModel # type: ignore - -from friendli.errors import NotSupportedQuantConfigError -from friendli.logging import logger -from friendli.modules.quantizer_v2.enums import QuantDatasetFormat -from friendli.modules.quantizer_v2.layers import ( - WeightActQuantizedLinearLayer, - WeightOnlyQuantizedLinearLayer, -) -from friendli.modules.quantizer_v2.schema.config import OneOfQuantConfig -from friendli.modules.quantizer_v2.schema.data import TFQuantInputs -from friendli.modules.quantizer_v2.utils import ( - collect_stats, - get_weight_act_quant_scales, - get_weight_only_quant_scales, - offload_module_sequence, - send_model_to_device, -) - - -class AbstractQuantHookV2(ABC): - """Abstract Quantization Hook for a specific model.""" - - def __init__(self, quant_config: OneOfQuantConfig, model_config: PretrainedConfig): - """Initialize the Quantization Hook. - - Args: - quant_config (OneOfQuantConfig): Quantization configuration. - model_config (PretrainedConfig): Model configuration. - """ - self.quant_config = quant_config - self.model_config = model_config - - @abstractmethod - def check_model_config(self) -> None: - """Check if the model is quantizable.""" - - @abstractmethod - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module], ...]: - """Get linear layer types in the model.""" - - @abstractmethod - def get_tf_blocks(self, model: PreTrainedModel) -> List[torch.nn.Module]: - """Get tensor fusion blocks in the model.""" - - @abstractmethod - def iter_tf_quant_inputs(self, model: PreTrainedModel) -> Iterator[TFQuantInputs]: - """Iterate over TFQuantInputs.""" - - @property - @abstractmethod - def quantized_layer_prefix(self) -> str: - """Returns the prefix of the transformer block name.""" - - -class AbstractQuantizerV2(ABC): - """Abstract class for quantizer.""" - - def __init__(self, hook: AbstractQuantHookV2, config: OneOfQuantConfig): - """Initialize AbstractQuantizer.""" - self.config = config - self.hook = hook - - def check_config(self) -> None: - """Check if the model is quantizable.""" - self.hook.check_model_config() - calibration_dataset_config = self.config.calibration_dataset - data_path_or_name = calibration_dataset_config.path_or_name - percentile = self.config.percentile - if percentile <= 0 or percentile > 100: - raise NotSupportedQuantConfigError( - invalid_option=str(percentile), - valid_options=["0 < percentile <= 100"], - ) - if not os.path.exists(data_path_or_name): - data_name = data_path_or_name.split(":")[0] - if data_name not in ( - data.id for data in huggingface_hub.list_datasets(search=data_name) - ): - raise NotSupportedQuantConfigError( - invalid_option=data_name, - valid_options=["datasets on the huggingface hub", "local path"], - ) - else: - if calibration_dataset_config.format not in QuantDatasetFormat: - raise NotSupportedQuantConfigError( - invalid_option=calibration_dataset_config.format, - valid_options=list(QuantDatasetFormat), - ) - try: - torch.device(self.config.device) - except ValueError as err: - raise NotSupportedQuantConfigError( - invalid_option=self.config.device, - valid_options=["cpu", "cuda"], - ) from err - - @contextmanager - def _try_offload_model(self, model: PreTrainedModel): - if not self.config.offload: - logger.info("Offloading not enabled. Skipping.") - model.to(self.config.device) - yield - else: - logger.info("Offloading enabled.") - tf_blocks = self.hook.get_tf_blocks(model) - send_model_to_device(model, self.config.device, exclude=tf_blocks) - with offload_module_sequence(tf_blocks, self.config.device): - yield - - @abstractmethod - def quantize(self, model: PreTrainedModel) -> PreTrainedModel: - """Quantize model.""" - - def pre_quantize(self, model: PreTrainedModel) -> PreTrainedModel: - """Preprocess model before quantization.""" - - def post_quantize(self, model: PreTrainedModel) -> PreTrainedModel: - """Postprocess model after quantization.""" - - @abstractmethod - def get_quant_config(self) -> Dict[str, Any]: - """Get quantizer config.""" - - -class AbstractWeightOnlyQuantizer(AbstractQuantizerV2): - """Abstract class for weight only quantizer.""" - - def quantize(self, model: PreTrainedModel) -> PreTrainedModel: - """Return quantized model.""" - with self._try_offload_model(model): - for tf_quant_inputs in tqdm( - self.hook.iter_tf_quant_inputs(model), - total=len(self.hook.get_tf_blocks(model)), - desc="Quantize model..", - ): - for quant_input in tf_quant_inputs.quant_inputs: - parent_module, local_names, names = ( - quant_input.parent_module, - quant_input.local_names, - quant_input.target_names, - ) - parent_modules_w_local_name = [] - if isinstance(parent_module, torch.nn.ModuleList): - # For MoE models with seperate expert layers - for p_module in parent_module: - for local_name in local_names: - parent_modules_w_local_name.append( - (p_module, local_name) - ) - else: - assert isinstance(parent_module, torch.nn.Module) - for local_name in local_names: - parent_modules_w_local_name.append( - (parent_module, local_name) - ) - layers = [ - p_module.get_submodule(local_name) - for p_module, local_name in parent_modules_w_local_name - ] - assert self.config.quant_scale_dtype - quant_results = get_weight_only_quant_scales( - model, - names, - quant_dtype=self.config.quant_dtype, - quant_scale_dtype=self.config.quant_scale_dtype, - q_group_size=self.config.quant_group_size, - use_symmetric=self.config.use_symmetric, - ) - q_layers = [ - WeightOnlyQuantizedLinearLayer.from_layer(layer, quant_result) - for layer, quant_result in zip(layers, quant_results) - ] - for (p_module, local_name), q_layer in zip( - parent_modules_w_local_name, q_layers - ): - setattr(p_module, local_name, q_layer) - return model - - -class AbstractWeightActQuantizer(AbstractQuantizerV2): - """Abstract class for weight and activation quantizer.""" - - @abstractmethod - def get_calib_dataloader(self) -> DataLoader: - """Get encoded calibration dataset.""" - - def quantize(self, model: PreTrainedModel) -> PreTrainedModel: - """Return quantized model.""" - with self._try_offload_model(model): - max_input_stats, _ = collect_stats( - model, - self.config.device, - self.get_calib_dataloader(), - self.hook.get_linear_layer_types(), - percentile=self.config.percentile, - tqdm_desc="Collecting stats for Static Quantization.", - ) - for tf_quant_inputs in tqdm( - self.hook.iter_tf_quant_inputs(model), - total=len(self.hook.get_tf_blocks(model)), - desc="Quantize model..", - ): - for quant_input in tf_quant_inputs.quant_inputs: - parent_module, local_names, names = ( - quant_input.parent_module, - quant_input.local_names, - quant_input.target_names, - ) - parent_modules_w_local_name = [] - if isinstance(parent_module, torch.nn.ModuleList): - # For MoE models with seperate expert layers - for p_module in parent_module: - for local_name in local_names: - parent_modules_w_local_name.append( - (p_module, local_name) - ) - else: - assert isinstance(parent_module, torch.nn.Module) - for local_name in local_names: - parent_modules_w_local_name.append((p_module, local_name)) - layers = [ - p_module.get_submodule(local_name) - for p_module, local_name in parent_modules_w_local_name - ] - assert self.config.quant_scale_dtype - quant_results = get_weight_act_quant_scales( - model, - names, - max_input_stats, - quant_scale_dtype=self.config.quant_scale_dtype, - quant_dtype=self.config.quant_dtype, - ) - q_layers = [ - WeightActQuantizedLinearLayer.from_layer(layer, quant_result) - for layer, quant_result in zip(layers, quant_results) - ] - for (p_module, local_name), q_layer in zip( - parent_modules_w_local_name, q_layers - ): - setattr(p_module, local_name, q_layer) - return model diff --git a/friendli/modules/quantizer_v2/enums.py b/friendli/modules/quantizer_v2/enums.py deleted file mode 100644 index 18bc60c7..00000000 --- a/friendli/modules/quantizer_v2/enums.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer Enums.""" - - -from __future__ import annotations - -from enum import Enum - - -class QuantMode(str, Enum): - """Supported quantization modes.""" - - INT8 = "int8" - DUMMY = "dummy" - - -class QuantDatasetFormat(str, Enum): - """Supported file format for calibration datasets for quantization.""" - - JSON = "json" - CSV = "csv" - PARQUET = "parquet" - TXT = "txt" - - -class Int8QuantType(str, Enum): - """Int8Quant modes.""" - - DYNAMIC = "dynamic" - - -class ModelDataType(str, Enum): - """Model dtype enums.""" - - BF16 = "bf16" - FP16 = "fp16" - FP32 = "fp32" - FP8_E4M3 = "fp8_e4m3" - INT8 = "int8" - INT4 = "int4" diff --git a/friendli/modules/quantizer_v2/int8/__init__.py b/friendli/modules/quantizer_v2/int8/__init__.py deleted file mode 100644 index 9f651b15..00000000 --- a/friendli/modules/quantizer_v2/int8/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Int8 Quantizer.""" diff --git a/friendli/modules/quantizer_v2/int8/base.py b/friendli/modules/quantizer_v2/int8/base.py deleted file mode 100644 index 66e200a8..00000000 --- a/friendli/modules/quantizer_v2/int8/base.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Int8 Quantizer Base.""" - -from __future__ import annotations - -from abc import abstractmethod -from typing import Any, Dict, Iterator, List, Tuple, cast - -import torch -from torch.utils.data import DataLoader -from transformers import PreTrainedModel # type: ignore - -from friendli.modules.converter.utils import get_tokenizer -from friendli.modules.quantizer_v2.base import ( - AbstractQuantHookV2, - AbstractQuantizerV2, - AbstractWeightActQuantizer, - AbstractWeightOnlyQuantizer, -) -from friendli.modules.quantizer_v2.int8.utils import perform_smoothing -from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig -from friendli.modules.quantizer_v2.schema.data import ModuleName -from friendli.modules.quantizer_v2.utils import collect_stats, safe_load_datasets - - -class Int8QuantHook(AbstractQuantHookV2): - """Int8 Quant Hook Base.""" - - @abstractmethod - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the attention fc layer in the decoder block.""" - - @abstractmethod - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Returns the second feed-forward layer in the decoder block.""" - - @abstractmethod - def iter_pre_act_post_act_params( - self, model: PreTrainedModel - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Returns iterator of pre_act_params and post_act_params per transformer block.""" - - -class Int8Quantizer(AbstractQuantizerV2): - """Int8 Quantizer Base.""" - - def get_smoothing_calib_dataloader(self) -> DataLoader: - """Get calibration dataset for Int8.""" - data_cfg = self.config.calibration_dataset - dataset = safe_load_datasets(data_cfg) - tokenizer = get_tokenizer(self.hook.model_config.name_or_path) - dataset = ( - dataset.shuffle(self.config.seed) - .select(range(data_cfg.num_samples)) - .select_columns([data_cfg.lookup_column_name]) - ) - encoded_dataset = tokenizer( - dataset[data_cfg.lookup_column_name], - return_tensors="pt", - truncation=True, - padding=True, - max_length=data_cfg.max_length, - ) - return DataLoader(encoded_dataset["input_ids"], batch_size=data_cfg.batch_size) - - def _smooth( - self, - model: PreTrainedModel, - ) -> None: - """Smooths the models before Quantization.""" - model.eval() - # collect stats for Int8 quantization scale. - with self._try_offload_model(model): - calib_dataloader = self.get_smoothing_calib_dataloader() - quant_config = cast(Int8QuantConfig, self.config) - max_input_stats, _ = collect_stats( - model, - quant_config.device, - calib_dataloader, - self.hook.get_linear_layer_types(), - tqdm_desc="Collecting stats for Smoothing.", - percentile=100.0, - ) - - for pre_act_params, post_act_params, name in cast( - Int8QuantHook, self.hook - ).iter_pre_act_post_act_params(model): - perform_smoothing( - pre_act_params, - post_act_params, - max_input_stats[name], - migration_strength=quant_config.int8_args.migration_strength, - inplace=True, - ) - - def pre_quantize( - self, - model: PreTrainedModel, - ) -> None: - """Pre-procedure that should be called before quantize() is called.""" - self._smooth(model) - - def quantize(self, model: PreTrainedModel) -> torch.nn.Module: - """Quantize the model.""" - self.pre_quantize(model) - return super().quantize(model) - - def get_quant_config(self) -> Dict[str, Any]: - """Get the quantization configuration.""" - return { - "bits": 8, - "mode": cast(Int8QuantConfig, self.config).int8_args.quant_type.value, - "zero_point": False, - "quant_method": "int8", - "quant_group_size": self.config.quant_group_size, - } - - -class Int8StaticQuantizer(Int8Quantizer, AbstractWeightActQuantizer): - """Int8 Dynamic Quantizer Base.""" - - -class Int8DynamicQuantizer(Int8Quantizer, AbstractWeightOnlyQuantizer): - """Int8 Dynamic Quantizer Base.""" diff --git a/friendli/modules/quantizer_v2/int8/utils.py b/friendli/modules/quantizer_v2/int8/utils.py deleted file mode 100644 index c482f87d..00000000 --- a/friendli/modules/quantizer_v2/int8/utils.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Int8 Quantizer Base.""" - -from __future__ import annotations - -from typing import List, Tuple - -import torch - - -@torch.no_grad() -def perform_smoothing( - pre_act_params: List[torch.Tensor], - post_act_params: List[torch.Tensor], - activation_max: torch.Tensor, - *, - migration_strength: float = 0.5, - epsilon: float = 1e-5, - inplace: bool = False, -) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: - """Perform activation-weight smoothing in SmoothQuant. - - Performs the activation-weight smoothing scheme described in SmoothQuant - (Xiao et al., 2023), which migrates the amplitude of outliers from activations - to weights of matmul layers. The function takes in the following parameters: - - Args: - pre_act_params: torch.Tensors representing affine parameters - before each matmul layer. - post_act_params: torch.Tensors representing the weight matrices of the matmul layer. - activation_max: The maximum activation value of inputs of the matmul layer. - migration_strength: the strength of the activation migration. Default is 0.5. - epsilon: The epsilon used for numerical stability when calculating the scales. - Default is 1e-5. - - Returns: - A tuple of three torch.Tensors: (smoothed_pre_act_params, smoothed_post_act_params) - - The function calculates "scales" as `pow(|Activation|, migration_strength) / - pow(|Weight|, 1-migration_strength)` and applies the smoothing effect into - a normalization layer that exists before every matmul layer. This is done because - it is more efficient than introducing a new smoothing layer before every matmul layer. - Fusing the smoothing effect into the normalization layer results in a faster and - more efficient implementation of the smoothing scheme. - - The function returns the smoothed normalization coefficients and the smoothed weight - matrices after the smoothing process. - """ - # shape of activation norms: [InChannels] - # shape of fc weights: [OutChannels, InChannels] - # shape of activation_max: [InChannels] - - # pylint: disable=too-many-locals - assert pre_act_params - assert post_act_params - - in_channels = pre_act_params[0].size(0) - device = pre_act_params[0].device - dtype = pre_act_params[0].dtype - - for pre_act_param in pre_act_params: - assert pre_act_param.device == device - assert pre_act_param.dtype == dtype - - for weight in post_act_params: - assert weight.ndim == 2 - assert weight.size(1) == in_channels, (weight.size(), in_channels) - assert weight.device == device - - activation_max = activation_max.to(device=device) - weight_max = post_act_params[0].abs().max(dim=0).values - for weight in post_act_params[1:]: - weight_max = torch.maximum(weight_max, weight.abs().max(dim=0).values) - - assert tuple(activation_max.size()) == (in_channels,) - assert tuple(weight_max.size()) == (in_channels,) - alpha = migration_strength - scales = ( - ( - activation_max.to(dtype=torch.float32).pow(alpha) - / weight_max.to(dtype=torch.float32).pow(1 - alpha) - ) - .clamp(min=epsilon) - .to(dtype=dtype) - ) - - scaled_pre_act_params = [act_norm / scales for act_norm in pre_act_params] - scaled_weights = [w * scales.view(1, -1) for w in post_act_params] - - if inplace: - for dst, src in zip(pre_act_params, scaled_pre_act_params): - dst.copy_(src) - for dst, src in zip(post_act_params, scaled_weights): - dst.copy_(src) - - return scaled_pre_act_params, scaled_weights diff --git a/friendli/modules/quantizer_v2/layers.py b/friendli/modules/quantizer_v2/layers.py deleted file mode 100644 index 3a203210..00000000 --- a/friendli/modules/quantizer_v2/layers.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Quantization Layers.""" - -from __future__ import annotations - -from typing import Optional, cast - -import torch - -from friendli.modules.quantizer_v2.schema.data import ( - WeightActQuantResult, - WeightOnlyQuantResult, -) - - -class WeightOnlyQuantizedLinearLayer(torch.nn.Module): - """Linear Layer with weight only quantization.""" - - def __init__( - self, - in_features: int, - out_features: int, - q_weight: torch.Tensor, - weight_scale: torch.Tensor, - zeros: Optional[torch.nn.Parameter] = None, - bias: Optional[torch.nn.Parameter] = None, - ): - """Initialize the Weight Only Quantized Linear Layer.""" - super().__init__() - self.in_features = in_features - self.out_features = out_features - self.weight_scale = torch.nn.Parameter(weight_scale) - self.weight = torch.nn.Parameter(q_weight, requires_grad=False) - self.register_parameter("zeros", zeros) - self.register_parameter("bias", bias) - - @staticmethod - def from_layer( - layer: torch.nn.Module, quant_result: WeightOnlyQuantResult - ) -> torch.nn.Module: - """Returns the quantized layer from the original layer.""" - zeros = ( - torch.nn.Parameter(quant_result.zero_point) - if quant_result.zero_point - else None - ) - return WeightOnlyQuantizedLinearLayer( - cast(torch.nn.Linear, layer).in_features, - cast(torch.nn.Linear, layer).out_features, - quant_result.q_weight, - quant_result.weight_scale, - zeros, - cast(torch.nn.Linear, layer).bias, - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """Forward pass with fake quantization. Not used in conversion.""" - raise NotImplementedError("Not used in conversion.") - - -class WeightActQuantizedLinearLayer(torch.nn.Module): - """Linear Layer with weight-act quantization.""" - - def __init__( # pylint: disable=too-many-arguments - self, - q_weight: torch.Tensor, - weight_scale: torch.Tensor, - act_scale: torch.Tensor, - bias: Optional[torch.nn.Parameter] = None, - ): - """Initialize the Weight Only Quantized Linear Layer.""" - super().__init__() - self.in_scale = torch.nn.Parameter(act_scale) - self.weight_scale = torch.nn.Parameter(weight_scale) - self.weight = torch.nn.Parameter(q_weight, requires_grad=False) - self.register_parameter("bias", bias) - - @staticmethod - def from_layer( - layer: torch.nn.Module, quant_result: WeightActQuantResult - ) -> torch.nn.Module: - """Returns the quantized layer from the original layer.""" - q_result = cast(WeightActQuantResult, quant_result) - return WeightActQuantizedLinearLayer( - q_result.q_weight, - q_result.weight_scale, - q_result.act_scale, - cast(torch.nn.Linear, layer).bias if hasattr(layer, "bias") else None, - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """Forward pass with fake quantization. Not used in conversion.""" - raise NotImplementedError("Not used in conversion.") diff --git a/friendli/modules/quantizer_v2/maps.py b/friendli/modules/quantizer_v2/maps.py deleted file mode 100644 index 48e972eb..00000000 --- a/friendli/modules/quantizer_v2/maps.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Quantizer V2 Maps.""" - -from __future__ import annotations - -from typing import Any, Dict, List, Tuple, Type, cast - -import transformers # type: ignore -from transformers import ( # type: ignore - LlamaForCausalLM, - MistralForCausalLM, - Phi3ForCausalLM, - PretrainedConfig, - PreTrainedModel, -) - -from friendli.errors import NotSupportedQuantModeError, QuantizationError -from friendli.modules.quantizer_v2.base import AbstractQuantizerV2 -from friendli.modules.quantizer_v2.enums import Int8QuantType, QuantMode -from friendli.modules.quantizer_v2.int8.base import Int8DynamicQuantizer, Int8QuantHook -from friendli.modules.quantizer_v2.models.llama import LlamaInt8QuantHook -from friendli.modules.quantizer_v2.models.phi3 import Phi3Int8QuantHook -from friendli.modules.quantizer_v2.schema.config import ( - Int8QuantConfig, - OneOfQuantConfig, -) - -model_arch_int8_hook_map: Dict[PreTrainedModel, type[Int8QuantHook]] = { - LlamaForCausalLM: LlamaInt8QuantHook, - MistralForCausalLM: LlamaInt8QuantHook, - Phi3ForCausalLM: Phi3Int8QuantHook, -} - - -def get_quanthook_map(quant_mode: QuantMode) -> Dict[Type[PreTrainedModel], Any]: - """Get quantizer map.""" - if quant_mode == QuantMode.INT8: - return model_arch_int8_hook_map - raise NotSupportedQuantModeError( - invalid_option=quant_mode, - valid_options=[e.value for e in QuantMode], - ) - - -def get_model_class(config: PretrainedConfig) -> PreTrainedModel: - """Get HuggingFace model architecture from config.""" - model_arch_list = cast(List[str], cast(PretrainedConfig, config).architectures) - if len(model_arch_list) == 0: - raise QuantizationError("Model architecture not found in config.") - model_arch = model_arch_list[0] - try: - cls_type = getattr(transformers, model_arch, None) - except AttributeError as exc: - raise QuantizationError(str(exc)) from exc - return cls_type - - -def get_quantizer_class(quant_config: OneOfQuantConfig) -> Type[AbstractQuantizerV2]: - """Get quantizer class.""" - quant_mode = quant_config.mode - if quant_mode == QuantMode.INT8: - if ( - cast(Int8QuantConfig, quant_config).int8_args.quant_type - == Int8QuantType.DYNAMIC - ): - return Int8DynamicQuantizer - raise QuantizationError( - "Only Dynamic quantization is supported for int8 quantization." - ) - raise NotSupportedQuantModeError( - invalid_option=quant_mode, - valid_options=[e.value for e in QuantMode], - ) - - -def get_hf_quantizer_factory( - model_config: PretrainedConfig, - quant_config: OneOfQuantConfig, -) -> Tuple[PreTrainedModel, AbstractQuantizerV2]: - """Get quantizer for specific model architecture with quant mode and args.""" - hf_model_cls = get_model_class(model_config) - quantizer = get_quantizer_class(quant_config) - quanthook_map = get_quanthook_map(quant_config.mode) - quanthook = quanthook_map[hf_model_cls](quant_config, model_config) - return hf_model_cls, quantizer(quanthook, quant_config) diff --git a/friendli/modules/quantizer_v2/models/llama.py b/friendli/modules/quantizer_v2/models/llama.py deleted file mode 100644 index 649d8471..00000000 --- a/friendli/modules/quantizer_v2/models/llama.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli LlamaForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type, cast - -import torch -from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedModel - -from friendli.errors import NotSupportedCheckpointError, QuantizationError -from friendli.modules.quantizer_v2.base import AbstractQuantHookV2 -from friendli.modules.quantizer_v2.int8.base import Int8QuantHook -from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig -from friendli.modules.quantizer_v2.schema.data import ( - ModuleName, - QuantInput, - TFQuantInputs, -) - - -class LlamaQuantHook(AbstractQuantHookV2): - """BaseQuantHook for LlamaForCausalLM.""" - - def check_model_config(self) -> None: - """Check if LLaMA architectures' config can be converted to Friendli format.""" - try: - if cast(LlamaConfig, self.model_config).hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(LlamaConfig, self.model_config).hidden_act}'", - valid_options=["silu"], - ) - if cast(LlamaConfig, self.model_config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if cast(LlamaConfig, self.model_config).rms_norm_eps not in (1e-5, 1e-6): - raise NotSupportedCheckpointError( - invalid_option=f"'rms_norm_eps={cast(LlamaConfig, self.model_config).rms_norm_eps}'", - valid_options=[1e-5, 1e-6], - ) - except AttributeError as exc: - raise QuantizationError(str(exc)) from exc - - def get_tf_blocks(self, model: PreTrainedModel) -> List[torch.nn.Module]: - """Return the transformer blocks in LlamaForCausalLM.""" - return model.model.layers - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Return the linear layer types in LlamaForCausalLM.""" - return (torch.nn.Linear,) - - @property - def quantized_layer_prefix(self) -> str: - """The layer name prefix used before LLaMA's transformer block number.""" - return "model.layers." - - -class LlamaInt8QuantHook(LlamaQuantHook, Int8QuantHook): - """Int8QuantHook for LlamaForCausalLM.""" - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Return the linear layer after attention in the decoder layer.""" - return decoder_layer.self_attn.o_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Return the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.down_proj - - def iter_pre_act_post_act_params( - self, - model: LlamaForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Return iterator of layernorm's weight and linear layer's weight per transformer block in LlamaForCausalLM.""" - - for index, decoder_layer in enumerate(model.model.layers): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.input_layernorm.weight.data, - ], - [ - decoder_layer.self_attn.q_proj.weight.data, - decoder_layer.self_attn.k_proj.weight.data, - decoder_layer.self_attn.v_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", # the input tensors fed into Q, K, V matrices are identical. - ) - # [LayerNorm 2] - [ MLP FF 1, MLP FF GATE ] gets smoothed - yield ( - [ - decoder_layer.post_attention_layernorm.weight.data, - ], - [ - decoder_layer.mlp.up_proj.weight.data, - decoder_layer.mlp.gate_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.mlp.up_proj", - ) - - def iter_tf_quant_inputs(self, model: PreTrainedModel) -> Iterator[TFQuantInputs]: - """Return the layers which should be quantized in transformer block of LlamaForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.self_attn - mlp = decoder_layer.mlp - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - QuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", - ], - local_names=["q_proj"], - ), - QuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.k_proj", - ], - local_names=["k_proj"], - ), - QuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.v_proj", - ], - local_names=["v_proj"], - ), - QuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - ], - local_names=[ - "o_proj", - ], - ), - QuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.up_proj", - ], - local_names=["up_proj"], - ), - QuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.gate_proj", - ], - local_names=["gate_proj"], - ), - QuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.down_proj" - ], - local_names=["down_proj"], - ), - ], - ) diff --git a/friendli/modules/quantizer_v2/models/phi3.py b/friendli/modules/quantizer_v2/models/phi3.py deleted file mode 100644 index 0fdc095f..00000000 --- a/friendli/modules/quantizer_v2/models/phi3.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Phi3ForCausalLM QuantizerHook.""" - -# mypy: ignore-errors - -from __future__ import annotations - -from typing import Iterator, List, Tuple, Type, cast - -import torch -from transformers import Phi3Config, Phi3ForCausalLM, PreTrainedModel - -from friendli.errors import NotSupportedCheckpointError, QuantizationError -from friendli.modules.quantizer_v2.base import AbstractQuantHookV2 -from friendli.modules.quantizer_v2.int8.base import Int8QuantHook -from friendli.modules.quantizer_v2.schema.data import ( - ModuleName, - QuantInput, - TFQuantInputs, -) - - -class Phi3QuantHook(AbstractQuantHookV2): - """BaseQuantHook for Phi3ForCausalLM.""" - - def check_model_config(self) -> None: - """Check if Phi3 architectures' config can be converted to Friendli format.""" - try: - if cast(Phi3Config, self.model_config).hidden_act not in ["silu"]: - raise NotSupportedCheckpointError( - invalid_option=f"'hidden_act={cast(Phi3Config, self.model_config).hidden_act}'", - valid_options=["silu"], - ) - if cast(Phi3Config, self.model_config).tie_word_embeddings: - raise NotSupportedCheckpointError( - invalid_option="'tie_word_embeddings=True'", - valid_options=[False], - ) - if cast(Phi3Config, self.model_config).rms_norm_eps not in (1e-5, 1e-6): - raise NotSupportedCheckpointError( - invalid_option=f"'rms_norm_eps={cast(Phi3Config, self.model_config).rms_norm_eps}'", - valid_options=[1e-5, 1e-6], - ) - except AttributeError as exc: - raise QuantizationError(str(exc)) from exc - - def get_tf_blocks(self, model: PreTrainedModel) -> List[torch.nn.Module]: - """Return the transformer blocks in Phi3ForCausalLM.""" - return model.model.layers - - def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]: - """Return the linear layer types in Phi3ForCausalLM.""" - return (torch.nn.Linear,) - - @property - def quantized_layer_prefix(self) -> str: - """The layer name prefix used before Phi3's transformer block number.""" - return "model.layers." - - -class Phi3Int8QuantHook(Phi3QuantHook, Int8QuantHook): - """Int8QuantHook for Phi3ForCausalLM.""" - - def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Return the linear layer after attention in the decoder layer.""" - return decoder_layer.self_attn.o_proj - - def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear: - """Return the linear layer after FF1 in the decoder layer.""" - return decoder_layer.mlp.down_proj - - def iter_pre_act_post_act_params( - self, - model: Phi3ForCausalLM, - ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]: - """Return iterator of layernorm's weight and linear layer's weight per transformer block in Phi3ForCausalLM.""" - - for index, decoder_layer in enumerate(model.model.layers): # type: ignore[union-attr] - # [LayerNorm 1] - [ QKV projection ] gets smoothed - yield ( - [ - decoder_layer.input_layernorm.weight.data, - ], - [ - decoder_layer.self_attn.qkv_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.self_attn.qkv_proj", - ) - # [LayerNorm 2] - [ MLP FF 1, MLP FF GATE ] gets smoothed - yield ( - [ - decoder_layer.post_attention_layernorm.weight.data, - ], - [ - decoder_layer.mlp.gate_up_proj.weight.data, - ], - f"{self.quantized_layer_prefix}{index}.mlp.gate_up_proj", - ) - - def iter_tf_quant_inputs(self, model: PreTrainedModel) -> Iterator[TFQuantInputs]: - """Return the layers which should be quantized in transformer block of Phi3ForCausalLM.""" - for index, decoder_layer in enumerate( - self.get_tf_blocks(model) # type: ignore[union-attr, arg-type] - ): - self_attn = decoder_layer.self_attn - mlp = decoder_layer.mlp - - yield TFQuantInputs( - layer_index=index, - block=decoder_layer, - quant_inputs=[ - QuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.qkv_proj", - ], - local_names=["qkv_proj"], - ), - QuantInput( - parent_module=self_attn, - target_names=[ - f"{self.quantized_layer_prefix}{index}.self_attn.o_proj", - ], - local_names=[ - "o_proj", - ], - ), - QuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.gate_up_proj", - ], - local_names=["gate_up_proj"], - ), - QuantInput( - parent_module=mlp, - target_names=[ - f"{self.quantized_layer_prefix}{index}.mlp.down_proj" - ], - local_names=["down_proj"], - ), - ], - ) diff --git a/friendli/modules/quantizer_v2/quantize.py b/friendli/modules/quantizer_v2/quantize.py deleted file mode 100644 index 8187db5f..00000000 --- a/friendli/modules/quantizer_v2/quantize.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Converter.""" - -from __future__ import annotations - -from typing import Optional - -from friendli.errors import TokenizerNotFoundError -from friendli.logging import logger -from friendli.modules.quantizer_v2.maps import get_hf_quantizer_factory -from friendli.modules.quantizer_v2.schema.config import OneOfQuantConfig -from friendli.modules.quantizer_v2.utils import ( - get_model_dtype, - get_model_pretrained_config, - save_tokenizer, -) - - -def quantize_checkpoint( - model_name_or_path: str, - output_dir: str, - quant_config: OneOfQuantConfig, - *, - cache_dir: Optional[str] = None, - dry_run: bool = False, -) -> None: - """Quantize HuggingFace model checkpoint to Friendli format. - - Args: - model_name_or_path (str): Hugging Face model name or local path to the checkpoint. - output_dir (str) : Directory path to save the converted checkpoint and the attribute YAML, - and tokenizer configuration file. - quant_config (OneOfQuantConfig): Quantization configuration. - cache_dir (Optional[str], optional): Path for downloading checkpoint. Defaults to None. - dry_run (bool, optional): Check only if checkpoint is convertable. Defaults to False. - - Raises: - InValidconfigError: Raised when data_type is not supported. - NotFoundError: Raised when `model_name_or_path` or `tokenizer_output_dir` is not found. - NotSupportedCheckpointError: Raised when model architecture is not supported to quantize. - """ - model_config = get_model_pretrained_config( - model_name_or_path, output_dir, cache_dir - ) - if quant_config.quant_scale_dtype is None: - model_dtype = get_model_dtype(model_config.torch_dtype) - quant_config.quant_scale_dtype = model_dtype - logger.warn( - "quant_scale_dtype is not set. Set to %s, same as hf model dtype.", - model_dtype, - ) - hf_factory, quantizer = get_hf_quantizer_factory(model_config, quant_config) - dtype = model_config.torch_dtype - quantizer.check_config() - - if not dry_run: - logger.info( - "Start loading Hugging Face checkpoint(%s) for conversion...", - model_name_or_path, - ) - model = hf_factory.from_pretrained( - model_name_or_path, - torch_dtype=dtype, - cache_dir=cache_dir, - trust_remote_code=True, - low_cpu_mem_usage=True, - # `low_cpu_mem_usage` is for model loading faster and using ~1x model size CPU memory. - # https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel.from_pretrained.example - ) - logger.info( - "Hugging Face checkpoint(%s) is successfully loaded!", - model_name_or_path, - ) - model = quantizer.quantize(model) - model.config.update({"quantization_config": quantizer.get_quant_config()}) - model.save_pretrained(output_dir) - try: - save_tokenizer( - model_name_or_path=model_name_or_path, - cache_dir=cache_dir, - save_dir=output_dir, - ) - except TokenizerNotFoundError as exc: - logger.warn(str(exc)) - logger.info( - "Hugging Face checkpoint (%s) is successfully quantized to Friendli format!", - model_name_or_path, - ) diff --git a/friendli/modules/quantizer_v2/schema/__init__.py b/friendli/modules/quantizer_v2/schema/__init__.py deleted file mode 100644 index f5d8dd04..00000000 --- a/friendli/modules/quantizer_v2/schema/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer Schema.""" diff --git a/friendli/modules/quantizer_v2/schema/config.py b/friendli/modules/quantizer_v2/schema/config.py deleted file mode 100644 index 37b481c2..00000000 --- a/friendli/modules/quantizer_v2/schema/config.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer Config Schema.""" - -from __future__ import annotations - -from typing import Literal, Optional, Union - -from pydantic import BaseModel, Field -from typing_extensions import Annotated - -from friendli.modules.quantizer_v2.enums import ( - Int8QuantType, - ModelDataType, - QuantDatasetFormat, - QuantMode, -) - - -class CalibrationDatasetConfig(BaseModel): - """Calibration dataset config.""" - - path_or_name: str = "cnn_dailymail:3.0.0" - format: QuantDatasetFormat = QuantDatasetFormat.JSON - split: str = "validation" - lookup_column_name: str = "article" - num_samples: int = 512 - max_length: int = 512 - batch_size: int = 1 - - -class AbstractQuantConfig(BaseModel): - """Abstract quantization config.""" - - mode: QuantMode - device: str = "cuda:0" - offload: bool = True - seed: int = 42 - percentile: float = 100.0 - quant_dtype: ModelDataType = ModelDataType.INT8 - quant_scale_dtype: Optional[ModelDataType] = None - use_symmetric: bool = True - quant_group_size: int = -1 # no grouping - calibration_dataset: CalibrationDatasetConfig = Field( - default_factory=CalibrationDatasetConfig - ) - - -class Int8QuantArtgs(BaseModel): - """Int8Quant args.""" - - migration_strength: float = 0.5 - quant_type: Int8QuantType = Int8QuantType.DYNAMIC - - -class Int8QuantConfig(AbstractQuantConfig): - """Int8Quant config.""" - - mode: Literal[QuantMode.INT8] = QuantMode.INT8 - int8_args: Int8QuantArtgs = Field(default_factory=Int8QuantArtgs) - - -class DummyQuantConfig(AbstractQuantConfig): - """Dummy quant config.""" - - mode: Literal[QuantMode.DUMMY] = QuantMode.DUMMY - - -OneOfQuantConfig = Annotated[ - Union[Int8QuantConfig, DummyQuantConfig], Field(discriminator="mode") -] - - -class QuantConfig(BaseModel): - """Quantization config.""" - - config: OneOfQuantConfig diff --git a/friendli/modules/quantizer_v2/schema/data.py b/friendli/modules/quantizer_v2/schema/data.py deleted file mode 100644 index a5d8e29d..00000000 --- a/friendli/modules/quantizer_v2/schema/data.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Model Quantizer Data Schema.""" -from __future__ import annotations - -from dataclasses import dataclass -from typing import List, Optional - -import torch - -ModuleName = str - - -@dataclass -class BaseQuantResult: - """Dataclass for quantization result per layer.""" - - q_group_size: int - zero_point: Optional[torch.Tensor] - q_weight: torch.Tensor - weight_scale: torch.Tensor - - -@dataclass -class WeightOnlyQuantResult(BaseQuantResult): - """Dataclass for weight-only quantization result per layer.""" - - -@dataclass -class WeightActQuantResult(BaseQuantResult): - """Dataclass for weight-activation quantization result per layer.""" - - act_scale: torch.Tensor - q_group_size: int - - -@dataclass -class QuantInput: - """Dataclass for quantization input of each layer in transformer block. - - When you want to quantize specific layers at once, the target layers should be - included in this dataclass. For example, if the quantization scale of the q_proj, - k_proj, and v_proj layers in the self-attention layer are calculated together, - the target_names and local_names of these layers should be included in the - same QuantInput dataclass. - - Attributes: - parent_module: module contains target layers. - target_names: list of target module's full name - (ex. model.model.layers.0.self_attn.q_proj, ) - local_names: list of target module's name using when access from parent_module - (ex. q_proj, k_proj, v_proj ) - """ - - parent_module: torch.nn.Module - target_names: List[ModuleName] - local_names: str - - -@dataclass -class TFQuantInputs: - """Dataclass for Container of per transformer block.""" - - layer_index: int - block: torch.nn.Module - quant_inputs: List[QuantInput] diff --git a/friendli/modules/quantizer_v2/utils.py b/friendli/modules/quantizer_v2/utils.py deleted file mode 100644 index 368ba95b..00000000 --- a/friendli/modules/quantizer_v2/utils.py +++ /dev/null @@ -1,565 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Friendli Quantizer Utils.""" - -from __future__ import annotations - -import os -from contextlib import contextmanager -from pathlib import Path -from typing import ( - Callable, - Dict, - Iterable, - List, - Optional, - Protocol, - Sequence, - Tuple, - Type, - Union, -) - -import datasets # type: ignore[import] -import torch -from accelerate import cpu_offload_with_hook # type: ignore -from torch.utils.data import DataLoader -from tqdm import tqdm -from transformers import ( # type: ignore - AutoConfig, - AutoTokenizer, - PretrainedConfig, - PreTrainedModel, - PreTrainedTokenizer, -) - -from friendli.errors import ( - InvalidConfigError, - NotFoundError, - QuantizationError, - TokenizerNotFoundError, -) -from friendli.logging import logger -from friendli.modules.quantizer_v2.enums import ModelDataType -from friendli.modules.quantizer_v2.schema.config import CalibrationDatasetConfig -from friendli.modules.quantizer_v2.schema.data import ( - ModuleName, - WeightActQuantResult, - WeightOnlyQuantResult, -) - - -def get_tokenizer( - model_name_or_path: str, - *, - cache_dir: Optional[str] = None, -) -> PreTrainedTokenizer: - """Try to get tokenizer of a pretrained model.""" - try: - tokenizer = AutoTokenizer.from_pretrained( - model_name_or_path, - cache_dir=cache_dir, - trust_remote_code=True, - ) - except OSError as exc: - raise TokenizerNotFoundError(str(exc)) from exc - - if not tokenizer.is_fast: - raise TokenizerNotFoundError( - "This model does not support Friendli-compatible tokenizer" - ) - - if tokenizer.pad_token != "": - tokenizer.pad_token = tokenizer.eos_token - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token - - return tokenizer - - -def save_tokenizer( - model_name_or_path: str, - *, - cache_dir: Optional[str] = None, - save_dir: str, -) -> Tuple[str, ...]: - """Try to save `tokenizer.json` of a pretrained model.""" - if not os.path.isdir(save_dir): - raise NotFoundError(f"Directory '{save_dir}' is not found.") - - tokenizer = get_tokenizer(model_name_or_path, cache_dir=cache_dir) - saved_file_paths = tokenizer.save_pretrained(save_directory=save_dir) - tokenizer_json_path = None - for path in saved_file_paths: - if "tokenizer.json" == os.path.basename(path): - tokenizer_json_path = path - break - - if tokenizer_json_path is None: - raise TokenizerNotFoundError( - "This model has the Friendli-compatible tokenizer implementation, but " - "'tokenizer.json' file is not found." - ) - return saved_file_paths - - -def get_model_pretrained_config( - model_name_or_path: str, model_output_path: str, cache_dir: Optional[str] = None -) -> PretrainedConfig: - """Get HuggingFace model configs.""" - try: - config = AutoConfig.from_pretrained( - model_name_or_path, cache_dir=cache_dir, trust_remote_code=True - ) - except OSError as exc: # from AutoConfig.from_pretrained() - config_dir = Path(model_name_or_path) - model_output_dir = Path(model_output_path).parent - if config_dir.exists() and model_output_dir.absolute() == config_dir.absolute(): - raise NotFoundError( - f"'output_dir' ({model_output_dir.as_posix()}) and " - f"'model_name_or_path' ({model_name_or_path}) are the same. " - "In such a case, checkpoints should be prepared in 'output_dir'." - ) from exc - raise NotFoundError(str(exc)) from exc - - return config - - -def safe_load_datasets(data_cfg: CalibrationDatasetConfig) -> datasets.Dataset: - """Load dataset from calibration dataset config.""" - data_path = data_cfg.path_or_name - data_split = data_cfg.split - - try: - if os.path.exists(data_path): - dataset = datasets.load_dataset( - data_cfg.format, - data_files=data_path, - split=data_split, - ) - else: - data_name_parts = data_path.split(":") - if len(data_name_parts) == 1: - dataset = datasets.load_dataset(data_path, split=data_split) - elif len(data_name_parts) == 2: - data_name, subset_name = data_name_parts - dataset = datasets.load_dataset( - data_name, subset_name, split=data_split - ) - else: - raise InvalidConfigError( - "Dataset name is in invalid format. " - "(valid format: '' or ':')" - ) - except ValueError as err: - raise QuantizationError(f"datasets.load_dataset failed. {str(err)}") from err - - if not isinstance(dataset, datasets.Dataset): - raise InvalidConfigError( - "This dataset format is not supported for the calibration." - ) - - return dataset - - -def build_percentile_statistics( - scale_percentile: float, - symmetric: bool = True, -) -> Tuple[Callable, Callable, Callable]: - """Builds the hooks for getting the max input and output activations of a model.""" - logger.info( - "Building percentile statistics hooks. scale_percentile: (%s)", - scale_percentile, - ) - - max_input_M1: Dict[str, torch.Tensor] = {} - max_input_M2: Dict[str, torch.Tensor] = {} - max_input_num: Dict[str, torch.Tensor] = {} - max_output_M1: Dict[str, torch.Tensor] = {} - max_output_M2: Dict[str, torch.Tensor] = {} - max_output_num: Dict[str, torch.Tensor] = {} - - def create_hook(name: ModuleName): - def update_stats( - max_M1: Dict[str, torch.Tensor], - max_M2: Dict[str, torch.Tensor], - max_num: Dict[str, int], - new_t: torch.Tensor, - ) -> None: - # Chan's method for computing mean and variance incrementally - new_t = new_t.detach().reshape(-1, new_t.size(-1)) - new_numel = new_t.size(0) - new_t_M1 = new_t.to(torch.float64).mean(dim=0) - if symmetric: - # it is assumed samples are always centered on zero - # in the symmetric quantization scheme - new_t_M1.zero_() - new_t_M2 = ((new_t.to(torch.float64) - new_t_M1) ** 2).sum(dim=0) - try: - pre_numel = max_num[name] - max_num[name] += new_numel - delta = new_t_M1 - max_M1[name] - max_M1[name] += delta * (new_numel / max_num[name]) - max_M2[name] += new_t_M2 + torch.pow(delta, 2) * ( - pre_numel * new_numel / max_num[name] - ) - except KeyError: - max_num[name] = new_numel - max_M1[name] = new_t_M1 - max_M2[name] = new_t_M2 - - def hook(module, in_t_tup, out_t): # pylint: disable=unused-argument - with torch.no_grad(): - in_t = in_t_tup[0] - update_stats(max_input_M1, max_input_M2, max_input_num, in_t) - update_stats(max_output_M1, max_output_M2, max_output_num, out_t) - - return hook - - def finish_input_stats(): - return { - name: torch.distributions.Normal( - loc=max_input_M1[name], - scale=torch.sqrt(max_input_M2[name] / max_input_num[name]).clip( - min=1e-7 - ), - ).icdf( - torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to( - max_input_M1[name].device - ) - ) - for name in list(max_input_M1.keys()) - } - - def finish_output_stats(): - return { - name: torch.distributions.Normal( - loc=max_output_M1[name], - scale=torch.sqrt(max_output_M2[name] / max_output_num[name]).clip( - min=1e-7 - ), - ).icdf( - torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to( - max_output_M1[name].device - ) - ) - for name in list(max_output_M1.keys()) - } - - return finish_input_stats, finish_output_stats, create_hook - - -def build_max_statistics() -> Tuple[Callable, Callable, Callable]: - """Builds the hooks for getting the max input and output activations of a model.""" - logger.info("Building max statistics hooks") - max_input_stats: Dict[str, torch.Tensor] = {} - max_output_stats: Dict[str, torch.Tensor] = {} - - def create_hook(name: ModuleName): - def hook(modules, in_t_tup, out_t): # pylint: disable=unused-argument - in_t = in_t_tup[0] - in_t = ( - in_t.detach().abs().reshape(-1, in_t.size(-1)).max(dim=0).values - ) # reduce-max only leaving the hidden dim (supposing the last dim is the hidden dim) - out_t = out_t.detach().reshape(-1, out_t.size(-1)) - out_t = out_t.abs().max(dim=0).values - try: - max_input_stats[name] = torch.maximum(max_input_stats[name], in_t) - except KeyError: - max_input_stats[name] = in_t - try: - max_output_stats[name] = torch.maximum(max_output_stats[name], out_t) - except KeyError: - max_output_stats[name] = out_t - - return hook - - def finish_input_stats(): - return max_input_stats - - def finish_output_stats(): - return max_output_stats - - return finish_input_stats, finish_output_stats, create_hook - - -@torch.no_grad() -def collect_stats( - model: PreTrainedModel, - device: str, - calib_dataloader: DataLoader, - target_classes: Tuple[Type[torch.nn.Module], ...], - tqdm_desc: str, - percentile: float, -) -> Tuple[Dict[ModuleName, torch.Tensor], Dict[ModuleName, torch.Tensor]]: - """Collects the maximum values of input and output activations of a specific model. - - Args: - model (torch.nn.Module): The model for which we want to collect the max statistics. - dataset (Dataset): Dataset that contains input tensors. - target_classes (Tuple[Type[torch.nn.Module], ...]): A tuple of the target classes. - - Returns: - A tuple of two dictionaries: (max_input_stats, max_output_stats), where: - max_input_stats: The maximum input activation values for each module of the model. - max_output_stats: The maximum output activation values for each module of the model. - - This function uses a forward hook to capture the maximum input and output activation values - of the specified target_classes. The max_batch_size parameter controls the size of the input - batches that are passed through the model. - - The function returns two dictionaries containing the maximum input and output activation - values for each module of the model, respectively. These dictionaries can be used to calculate - scaling factors for weight quantization and activation smoothing. - - """ - # pylint: disable=too-many-locals - max_input_stats, max_output_stats, create_hook = ( - build_percentile_statistics(percentile) - if percentile < 100.0 - else build_max_statistics() - ) - name_mods = [ - (name, module) - for name, module in model.named_modules() - if isinstance(module, target_classes) - ] - - removables = [] - for name, module in name_mods: - removables.append(module.register_forward_hook(create_hook(name))) - try: - for inputs in tqdm(calib_dataloader, desc=tqdm_desc): - model(inputs.to(device)) - finally: - for removable in removables: - removable.remove() - return max_input_stats(), max_output_stats() - - -def convert_tensor_to_quant_dtype( - param: torch.Tensor, - quant_dtype: ModelDataType, -) -> torch.Tensor: - """Convert tensor format to the given data type. - - Args: - param (torch.Tensor): The tensor to be converted. - data_type (ModelDataType): The data type of the tensor. - - Returns: - torch.Tensor: The converted tensor. - - """ - assert quant_dtype in [ModelDataType.INT4, ModelDataType.INT8] - if quant_dtype is ModelDataType.INT4: - pack_num = 8 // 4 - int4_param = torch.zeros( - (param.shape[0], param.shape[1] // pack_num), - dtype=torch.uint8, - device=param.device, - ) - for col in range(int4_param.shape[1]): - for i in range(pack_num): - int4_param[:, col] |= param[:, col * pack_num + i] << (i * 4) - param = int4_param.to(torch.int8) - - elif quant_dtype is ModelDataType.INT8: - param = param.to(torch.int8) - - return param.detach().to("cpu") - - -@torch.no_grad() -def get_weight_act_quant_scales( - model: PreTrainedModel, - layer_names: List[str], - max_input_stats: Dict[ModuleName, torch.Tensor], - device: str = "cpu", - quant_dtype: ModelDataType = ModelDataType.INT8, - quant_scale_dtype: ModelDataType = ModelDataType.FP32, -) -> List[WeightActQuantResult]: - """Get the quantization scales and int8 weight for a specific layer.""" - input_max = torch.concat([max_input_stats[name] for name in layer_names]) - target_weights = [model.get_submodule(name).weight for name in layer_names] - target_weight = torch.concat(target_weights) - - max_val = 2 ** (8 - 1) - 1 - min_val = -(2 ** (8 - 1)) - - act_scale = float(input_max.detach().abs().max().item()) / float(max_val) - weight_scale = float(target_weight.detach().abs().max().item()) / float(max_val) - - q_weights = [ - ( - convert_tensor_to_quant_dtype( - (weight.detach().float() / weight_scale).clip(min_val, max_val), - quant_dtype, - ).to(device) - ) - for weight in target_weights - ] - quant_scale_torch_dtype = get_torch_data_type(quant_scale_dtype) - return [ - WeightActQuantResult( - act_scale=torch.tensor(act_scale, dtype=quant_scale_torch_dtype), - weight_scale=torch.tensor(weight_scale, dtype=quant_scale_torch_dtype), - q_weight=q_weight, - q_group_size=-1, - zero_point=None, - ) - for _, q_weight in zip(layer_names, q_weights) - ] - - -def get_weight_only_quant_scales( - model: PreTrainedModel, - layer_names: List[str], - quant_dtype: ModelDataType, - quant_scale_dtype: ModelDataType, - q_group_size: int = -1, - use_symmetric: bool = True, - device: Union[str, torch.device] = "cpu", -) -> List[WeightOnlyQuantResult]: - """Return the quantization scales of weight for a specific layer.""" - # pylint: disable=too-many-locals - assert quant_dtype in [ModelDataType.INT4, ModelDataType.INT8] - q_bit = 4 if quant_dtype == ModelDataType.INT4 else 8 - target_weights = [model.get_submodule(name).weight for name in layer_names] - org_w_shape = target_weights[0].shape # [OutDim, InDim] - w = torch.concat(target_weights) - - if q_group_size != -1: - w = w.reshape(-1, q_group_size) # [OutDim x num_groups, group_size] - - if use_symmetric: - max_val = w.abs().amax(dim=1, keepdim=True) - max_int = 2 ** (q_bit - 1) - 1 - min_int = -(2 ** (q_bit - 1)) - scales = (max_val / float(max_int)).clamp(min=1e-5) - zeros = torch.zeros_like(max_val) - else: - max_val = w.amax(dim=1, keepdim=True) - min_val = w.amin(dim=1, keepdim=True) - max_int = 2**q_bit - 1 - min_int = 0 - - scales = (max_val - min_val).clamp(min=1e-5) / max_int - zeros = (-torch.round(min_val / scales)).clamp_(min_int, max_int) - - q_weights = [ - convert_tensor_to_quant_dtype( - torch.clamp(torch.round(w / scales) + zeros, min_int, max_int) - .reshape(org_w_shape) - .detach(), - quant_dtype, - ).to(device) - for w in target_weights - ] - quant_scale_torch_dtype = get_torch_data_type(quant_scale_dtype) - scales = ( - scales.view(org_w_shape[0], -1).detach().transpose(0, 1).to(device) - ) # [num_groups, OutDim] - zeros = ( - zeros.view(org_w_shape[0], -1).detach().transpose(0, 1).to(device) - ) # [num_groups, OutDim] - - if q_group_size == -1: - scales = scales.squeeze(0) - zeros = zeros.squeeze(0) - - return [ - WeightOnlyQuantResult( - zero_point=None if use_symmetric else zeros.to(quant_scale_torch_dtype), - q_group_size=q_group_size, - weight_scale=scales.to(quant_scale_torch_dtype), - q_weight=q_weight, - ) - for q_weight in q_weights - ] - - -def get_model_dtype(torch_dtype: torch.dtype) -> ModelDataType: - """Get torch data type from Enum.""" - if torch_dtype == torch.float16: - return ModelDataType.FP16 - if torch_dtype == torch.float32: - return ModelDataType.FP32 - if torch_dtype == torch.bfloat16: - return ModelDataType.BF16 - raise QuantizationError(f"{torch_dtype} is not valid dtype for hf model dtype.") - - -def get_torch_data_type(data_type: str) -> torch.dtype: - """Get torch data type from Enum.""" - if data_type == ModelDataType.FP16: - return torch.float16 - if data_type == ModelDataType.FP32: - return torch.float32 - if data_type == ModelDataType.BF16: - return torch.bfloat16 - raise QuantizationError( - f"Can't not converted original param to {data_type}. Only FP16, FP32, BF16 are supported." - ) - - -def send_model_to_device( - model: PreTrainedModel, - device: Union[str, torch.device], - *, - exclude: Iterable[torch.nn.Module] = (), -): - """Send the model and its submodules onto device except for modules designated by `exclude`.""" - exclude_set = set(exclude) - - @torch.no_grad() - def recurse(m: torch.nn.Module): - if m in exclude_set: - return - for name, p in list(m.named_parameters(recurse=False)): - m.register_parameter(name, torch.nn.Parameter(p.to(device))) - for name, b in list(m.named_buffers(recurse=False)): - m.register_buffer(name, b.to(device)) - - for child in m.children(): - recurse(child) - - recurse(model) - - -class RemovableOffloaderHook(Protocol): - """Hook protocol for cpu offloader.""" - - def offload(self) -> None: - """Offload the associated block onto CPU.""" - - def remove(self) -> None: - """Remove this hook.""" - - -@contextmanager -def offload_module_sequence( - blocks: Sequence[torch.nn.Module], device: Union[str, torch.device] -): - """Offload a sequence of torch modules automatically. - - In the beginning, all blocks are supposed to reside on CPU. - When i-th block is called, it is loaded onto `device` on the fly. - And at the same time, it offloads (i-1)-th block back to CPU. - """ - module_hooks: List[RemovableOffloaderHook] = [] - if blocks: - prev_module_hook = None - for tf_block in blocks: - _, module_hook = cpu_offload_with_hook( - tf_block, device, prev_module_hook=prev_module_hook - ) - prev_module_hook = module_hook - module_hooks.append(module_hook) - try: - yield - finally: - for hook in module_hooks: - hook.offload() - for hook in module_hooks: - hook.remove() diff --git a/friendli/sdk/api/base.py b/friendli/sdk/api/base.py index 2e6aad86..8c803c50 100644 --- a/friendli/sdk/api/base.py +++ b/friendli/sdk/api/base.py @@ -24,7 +24,12 @@ from friendli.auth import get_auth_header from friendli.errors import APIError -from friendli.utils.request import DEFAULT_REQ_TIMEOUT, transform_request_data +from friendli.utils.request import ( + DEFAULT_CONNECTION_LIMITS, + DEFAULT_REQ_TIMEOUT, + DEFAULT_TIMEOUT, + transform_request_data, +) _GenerationLine = TypeVar("_GenerationLine", bound=BaseModel) @@ -93,10 +98,26 @@ async def __anext__(self) -> _GenerationLine: _ProtoMsgType = TypeVar("_ProtoMsgType", bound=Type[pb_message.Message]) +class _DefaultHttpxClient(httpx.Client): + def __init__(self, **kwargs: Any) -> None: + kwargs.setdefault("timeout", DEFAULT_TIMEOUT) + kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) + kwargs.setdefault("follow_redirects", True) + super().__init__(**kwargs) + + +class _DefaultAsyncHttpxClient(httpx.AsyncClient): + def __init__(self, **kwargs: Any) -> None: + kwargs.setdefault("timeout", DEFAULT_TIMEOUT) + kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) + kwargs.setdefault("follow_redirects", True) + super().__init__(**kwargs) + + class BaseAPI(ABC, Generic[_HttpxClient, _ProtoMsgType]): """Base API interface.""" - _client: _HttpxClient + _http_client: _HttpxClient def __init__( self, @@ -133,13 +154,12 @@ def _build_http_request( self, data: dict[str, Any], model: Optional[str] = None ) -> httpx.Request: """Build request.""" - return self._client.build_request( + return self._http_client.build_request( method=self._method, url=self._build_http_url(), content=self._build_content(data, model), files=self._build_files(data), headers=self._get_headers(), - timeout=DEFAULT_REQ_TIMEOUT, ) def _build_http_url(self) -> httpx.URL: @@ -213,7 +233,7 @@ def __init__( endpoint_id: Optional[str] = None, use_protobuf: bool = False, use_grpc: bool = False, - client: Optional[httpx.Client] = None, + http_client: Optional[httpx.Client] = None, grpc_channel: Optional[grpc.Channel] = None, ) -> None: """Initializes ServingAPI.""" @@ -224,7 +244,7 @@ def __init__( ) self._use_grpc = use_grpc - self._client = client or httpx.Client() + self._http_client = http_client or _DefaultHttpxClient() self._grpc_channel = grpc_channel self._grpc_stub = None @@ -240,7 +260,7 @@ def close(self) -> None: """Close the gRPC channel and HTTP client.""" if self._grpc_channel: self._grpc_channel.close() - self._client.close() + self._http_client.close() def _get_grpc_stub(self, channel: grpc.Channel) -> Any: raise NotImplementedError # pragma: no cover @@ -274,7 +294,7 @@ def _request( return grpc_response http_request = self._build_http_request(data=data, model=model) - http_response = self._client.send(request=http_request, stream=stream) + http_response = self._http_client.send(request=http_request, stream=stream) self._check_http_error(http_response) return http_response @@ -302,7 +322,7 @@ def __init__( endpoint_id: Optional[str] = None, use_protobuf: bool = False, use_grpc: bool = False, - client: Optional[httpx.AsyncClient] = None, + http_client: Optional[httpx.AsyncClient] = None, grpc_channel: Optional[grpc.aio.Channel] = None, ) -> None: """Initializes AsyncServingAPI.""" @@ -311,7 +331,7 @@ def __init__( ) self._use_grpc = use_grpc - self._client = client or httpx.AsyncClient() + self._http_client = http_client or _DefaultAsyncHttpxClient() self._grpc_channel = grpc_channel self._grpc_stub = None @@ -327,7 +347,7 @@ async def close(self) -> None: """Close the gRPC channel and HTTP client.""" if self._grpc_channel: await self._grpc_channel.close(grace=None) - await self._client.aclose() + await self._http_client.aclose() def _get_grpc_stub(self, channel: grpc.aio.Channel) -> Any: raise NotImplementedError # pragma: no cover @@ -363,7 +383,9 @@ async def _request( return grpc_response http_request = self._build_http_request(data=data, model=model) - http_response = await self._client.send(request=http_request, stream=stream) + http_response = await self._http_client.send( + request=http_request, stream=stream + ) await self._check_http_error(http_response) return http_response diff --git a/friendli/sdk/api/chat/chat.py b/friendli/sdk/api/chat/chat.py index 9741794d..7351ac01 100644 --- a/friendli/sdk/api/chat/chat.py +++ b/friendli/sdk/api/chat/chat.py @@ -24,7 +24,7 @@ def __init__( endpoint_id: Optional[str] = None, use_protobuf: bool = False, use_grpc: bool = False, - client: Optional[httpx.Client] = None, + http_client: Optional[httpx.Client] = None, grpc_channel: Optional[grpc.Channel] = None, ) -> None: """Initializes Chat.""" @@ -33,7 +33,7 @@ def __init__( endpoint_id=endpoint_id, use_protobuf=use_protobuf, use_grpc=use_grpc, - client=client, + http_client=http_client, grpc_channel=grpc_channel, ) @@ -53,7 +53,7 @@ def __init__( endpoint_id: Optional[str] = None, use_protobuf: bool = False, use_grpc: bool = False, - client: Optional[httpx.AsyncClient] = None, + http_client: Optional[httpx.AsyncClient] = None, grpc_channel: Optional[grpc.aio.Channel] = None, ) -> None: """Initializes AsyncChat.""" @@ -62,7 +62,7 @@ def __init__( endpoint_id=endpoint_id, use_protobuf=use_protobuf, use_grpc=use_grpc, - client=client, + http_client=http_client, grpc_channel=grpc_channel, ) diff --git a/friendli/sdk/api/images/images.py b/friendli/sdk/api/images/images.py index 1901c501..fb58e658 100644 --- a/friendli/sdk/api/images/images.py +++ b/friendli/sdk/api/images/images.py @@ -20,11 +20,11 @@ def __init__( self, base_url: Optional[str] = None, endpoint_id: Optional[str] = None, - client: Optional[httpx.Client] = None, + http_client: Optional[httpx.Client] = None, ) -> None: """Initialize Images.""" self.text_to_image = TextToImage( - base_url=base_url, endpoint_id=endpoint_id, client=client + base_url=base_url, endpoint_id=endpoint_id, http_client=http_client ) def close(self) -> None: @@ -41,11 +41,11 @@ def __init__( self, base_url: Optional[str] = None, endpoint_id: Optional[str] = None, - client: Optional[httpx.AsyncClient] = None, + http_client: Optional[httpx.AsyncClient] = None, ) -> None: """Initialize Images.""" self.text_to_image = AsyncTextToImage( - base_url=base_url, endpoint_id=endpoint_id, client=client + base_url=base_url, endpoint_id=endpoint_id, http_client=http_client ) async def close(self) -> None: diff --git a/friendli/sdk/client.py b/friendli/sdk/client.py index 6ef97e53..6930641c 100644 --- a/friendli/sdk/client.py +++ b/friendli/sdk/client.py @@ -8,6 +8,7 @@ import grpc import grpc.aio +import httpx import friendli from friendli.client.graphql.endpoint import EndpointGqlClient @@ -34,6 +35,7 @@ def __init__( base_url: Optional[str] = None, use_protobuf: bool = False, use_grpc: bool = False, + http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None, grpc_channel: Optional[Union[grpc.Channel, grpc.aio.Channel]] = None, ): """Initializes FriendliClientBase.""" @@ -52,6 +54,8 @@ def __init__( raise ValueError( "One of `base_url` and `grpc_channel` should be set when `use_grpc=True`." ) + if http_client is not None: + raise ValueError("You cannot use HTTP client when `use_grpc=True`.") else: if grpc_channel is not None: raise ValueError( @@ -80,6 +84,7 @@ def __init__( base_url: Optional[str] = None, use_protobuf: bool = False, use_grpc: bool = False, + http_client: Optional[httpx.Client] = None, grpc_channel: Optional[grpc.Channel] = None, ): """Initializes Friendli.""" @@ -91,6 +96,7 @@ def __init__( base_url=base_url, use_protobuf=use_protobuf, use_grpc=use_grpc, + http_client=http_client, grpc_channel=grpc_channel, ) @@ -99,6 +105,7 @@ def __init__( endpoint_id=self._endpoint_id, use_protobuf=use_protobuf, use_grpc=use_grpc, + http_client=http_client, grpc_channel=grpc_channel, ) self.chat = Chat( @@ -106,9 +113,14 @@ def __init__( endpoint_id=self._endpoint_id, use_protobuf=use_protobuf, use_grpc=use_grpc, + http_client=http_client, grpc_channel=grpc_channel, ) - self.images = Images(base_url=self._base_url, endpoint_id=self._endpoint_id) + self.images = Images( + base_url=self._base_url, + endpoint_id=self._endpoint_id, + http_client=http_client, + ) endpoint_client = EndpointGqlClient() model_client = ModelGqlClient() @@ -147,6 +159,7 @@ def __init__( base_url: Optional[str] = None, use_protobuf: bool = False, use_grpc: bool = False, + http_client: Optional[httpx.AsyncClient] = None, grpc_channel: Optional[grpc.aio.Channel] = None, ): """Initializes AsyncFriendli.""" @@ -158,6 +171,7 @@ def __init__( base_url=base_url, use_protobuf=use_protobuf, use_grpc=use_grpc, + http_client=http_client, grpc_channel=grpc_channel, ) @@ -166,6 +180,7 @@ def __init__( endpoint_id=self._endpoint_id, use_protobuf=use_protobuf, use_grpc=use_grpc, + http_client=http_client, grpc_channel=grpc_channel, ) self.chat = AsyncChat( @@ -173,10 +188,13 @@ def __init__( endpoint_id=self._endpoint_id, use_protobuf=use_protobuf, use_grpc=use_grpc, + http_client=http_client, grpc_channel=grpc_channel, ) self.images = AsyncImages( - base_url=self._base_url, endpoint_id=self._endpoint_id + base_url=self._base_url, + endpoint_id=self._endpoint_id, + http_client=http_client, ) async def __aenter__(self) -> AsyncFriendli: diff --git a/friendli/utils/request.py b/friendli/utils/request.py index 49157af5..9fb9e6bf 100644 --- a/friendli/utils/request.py +++ b/friendli/utils/request.py @@ -6,14 +6,19 @@ from typing import Any +import httpx import pydantic from requests.exceptions import HTTPError from friendli.utils.compat import model_dump from friendli.utils.url import discuss_url -DEFAULT_REQ_TIMEOUT = 30 +DEFAULT_REQ_TIMEOUT = 600.0 MAX_RETRIES = 3 +DEFAULT_TIMEOUT = httpx.Timeout(timeout=DEFAULT_REQ_TIMEOUT, connect=5.0) +DEFAULT_CONNECTION_LIMITS = httpx.Limits( + max_connections=1000, max_keepalive_connections=100 +) def decode_http_err(exc: HTTPError) -> str: diff --git a/friendli/utils/validate.py b/friendli/utils/validate.py index 8557d666..0e395266 100644 --- a/friendli/utils/validate.py +++ b/friendli/utils/validate.py @@ -6,7 +6,6 @@ from datetime import datetime from enum import Enum -from importlib.util import find_spec from typing import Any, Dict, Optional, Type import typer @@ -81,16 +80,3 @@ def validate_enums(val: Any, enum_cls: Type[Enum]) -> Any: raise InvalidConfigError( f"Invalid value. Please provide one of {supported_values}" ) from exc - - -def validate_convert_imports() -> None: - """Validate the import modules for checkpoint conversion.""" - if find_spec("torch") is None: - raise ModuleNotFoundError( - "To convert the checkpoint, you must install 'torch'." - ) - if find_spec("transformers") is None or find_spec("accelerate") is None: - raise ModuleNotFoundError( - "To convert the checkpoint," - " your must install the package with 'pip install \"friendli-client[mllib]\"'" - ) diff --git a/friendli/utils/version.py b/friendli/utils/version.py index 55cf390b..ba7e9574 100644 --- a/friendli/utils/version.py +++ b/friendli/utils/version.py @@ -48,8 +48,3 @@ def check_dependencies_installed(deps: List[str]) -> bool: return False return True - - -def check_extras_installed() -> bool: - """Check extra package dependencies are installed.""" - return check_dependencies_installed(["torch", "transformers"]) diff --git a/poetry.lock b/poetry.lock index 763e0739..b2a31c21 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,142 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. - -[[package]] -name = "accelerate" -version = "0.21.0" -description = "Accelerate" -optional = true -python-versions = ">=3.8.0" -files = [ - {file = "accelerate-0.21.0-py3-none-any.whl", hash = "sha256:e2609d37f2c6a56e36a0612feae6ff6d9daac9759f4899432b86b1dc97024ebb"}, - {file = "accelerate-0.21.0.tar.gz", hash = "sha256:e2959a0bf74d97c0b3c0e036ed96065142a060242281d27970d4c4e34f11ca59"}, -] - -[package.dependencies] -numpy = ">=1.17" -packaging = ">=20.0" -psutil = "*" -pyyaml = "*" -torch = ">=1.10.0" - -[package.extras] -dev = ["black (>=23.1,<24.0)", "datasets", "deepspeed", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.0.241)", "scikit-learn", "scipy", "tqdm", "transformers", "urllib3 (<2.0.0)"] -quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.0.241)", "urllib3 (<2.0.0)"] -rich = ["rich"] -sagemaker = ["sagemaker"] -test-dev = ["datasets", "deepspeed", "evaluate", "scikit-learn", "scipy", "tqdm", "transformers"] -test-prod = ["parameterized", "pytest", "pytest-subtests", "pytest-xdist"] -test-trackers = ["comet-ml", "tensorboard", "wandb"] -testing = ["datasets", "deepspeed", "evaluate", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "tqdm", "transformers"] - -[[package]] -name = "aiohttp" -version = "3.9.3" -description = "Async http client/server framework (asyncio)" -optional = true -python-versions = ">=3.8" -files = [ - {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:939677b61f9d72a4fa2a042a5eee2a99a24001a67c13da113b2e30396567db54"}, - {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f5cd333fcf7590a18334c90f8c9147c837a6ec8a178e88d90a9b96ea03194cc"}, - {file = "aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82e6aa28dd46374f72093eda8bcd142f7771ee1eb9d1e223ff0fa7177a96b4a5"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f56455b0c2c7cc3b0c584815264461d07b177f903a04481dfc33e08a89f0c26b"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bca77a198bb6e69795ef2f09a5f4c12758487f83f33d63acde5f0d4919815768"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e083c285857b78ee21a96ba1eb1b5339733c3563f72980728ca2b08b53826ca5"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab40e6251c3873d86ea9b30a1ac6d7478c09277b32e14745d0d3c6e76e3c7e29"}, - {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df822ee7feaaeffb99c1a9e5e608800bd8eda6e5f18f5cfb0dc7eeb2eaa6bbec"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:acef0899fea7492145d2bbaaaec7b345c87753168589cc7faf0afec9afe9b747"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cd73265a9e5ea618014802ab01babf1940cecb90c9762d8b9e7d2cc1e1969ec6"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a78ed8a53a1221393d9637c01870248a6f4ea5b214a59a92a36f18151739452c"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:6b0e029353361f1746bac2e4cc19b32f972ec03f0f943b390c4ab3371840aabf"}, - {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cf5c9458e1e90e3c390c2639f1017a0379a99a94fdfad3a1fd966a2874bba52"}, - {file = "aiohttp-3.9.3-cp310-cp310-win32.whl", hash = "sha256:3e59c23c52765951b69ec45ddbbc9403a8761ee6f57253250c6e1536cacc758b"}, - {file = "aiohttp-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:055ce4f74b82551678291473f66dc9fb9048a50d8324278751926ff0ae7715e5"}, - {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6b88f9386ff1ad91ace19d2a1c0225896e28815ee09fc6a8932fded8cda97c3d"}, - {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c46956ed82961e31557b6857a5ca153c67e5476972e5f7190015018760938da2"}, - {file = "aiohttp-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07b837ef0d2f252f96009e9b8435ec1fef68ef8b1461933253d318748ec1acdc"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad46e6f620574b3b4801c68255492e0159d1712271cc99d8bdf35f2043ec266"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ed3e046ea7b14938112ccd53d91c1539af3e6679b222f9469981e3dac7ba1ce"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:039df344b45ae0b34ac885ab5b53940b174530d4dd8a14ed8b0e2155b9dddccb"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7943c414d3a8d9235f5f15c22ace69787c140c80b718dcd57caaade95f7cd93b"}, - {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84871a243359bb42c12728f04d181a389718710129b36b6aad0fc4655a7647d4"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5eafe2c065df5401ba06821b9a054d9cb2848867f3c59801b5d07a0be3a380ae"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9d3c9b50f19704552f23b4eaea1fc082fdd82c63429a6506446cbd8737823da3"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:f033d80bc6283092613882dfe40419c6a6a1527e04fc69350e87a9df02bbc283"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:2c895a656dd7e061b2fd6bb77d971cc38f2afc277229ce7dd3552de8313a483e"}, - {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1f5a71d25cd8106eab05f8704cd9167b6e5187bcdf8f090a66c6d88b634802b4"}, - {file = "aiohttp-3.9.3-cp311-cp311-win32.whl", hash = "sha256:50fca156d718f8ced687a373f9e140c1bb765ca16e3d6f4fe116e3df7c05b2c5"}, - {file = "aiohttp-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:5fe9ce6c09668063b8447f85d43b8d1c4e5d3d7e92c63173e6180b2ac5d46dd8"}, - {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:38a19bc3b686ad55804ae931012f78f7a534cce165d089a2059f658f6c91fa60"}, - {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:770d015888c2a598b377bd2f663adfd947d78c0124cfe7b959e1ef39f5b13869"}, - {file = "aiohttp-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee43080e75fc92bf36219926c8e6de497f9b247301bbf88c5c7593d931426679"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52df73f14ed99cee84865b95a3d9e044f226320a87af208f068ecc33e0c35b96"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc9b311743a78043b26ffaeeb9715dc360335e5517832f5a8e339f8a43581e4d"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b955ed993491f1a5da7f92e98d5dad3c1e14dc175f74517c4e610b1f2456fb11"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504b6981675ace64c28bf4a05a508af5cde526e36492c98916127f5a02354d53"}, - {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fe5571784af92b6bc2fda8d1925cccdf24642d49546d3144948a6a1ed58ca5"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ba39e9c8627edc56544c8628cc180d88605df3892beeb2b94c9bc857774848ca"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e5e46b578c0e9db71d04c4b506a2121c0cb371dd89af17a0586ff6769d4c58c1"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:938a9653e1e0c592053f815f7028e41a3062e902095e5a7dc84617c87267ebd5"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:c3452ea726c76e92f3b9fae4b34a151981a9ec0a4847a627c43d71a15ac32aa6"}, - {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff30218887e62209942f91ac1be902cc80cddb86bf00fbc6783b7a43b2bea26f"}, - {file = "aiohttp-3.9.3-cp312-cp312-win32.whl", hash = "sha256:38f307b41e0bea3294a9a2a87833191e4bcf89bb0365e83a8be3a58b31fb7f38"}, - {file = "aiohttp-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:b791a3143681a520c0a17e26ae7465f1b6f99461a28019d1a2f425236e6eedb5"}, - {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ed621426d961df79aa3b963ac7af0d40392956ffa9be022024cd16297b30c8c"}, - {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f46acd6a194287b7e41e87957bfe2ad1ad88318d447caf5b090012f2c5bb528"}, - {file = "aiohttp-3.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:feeb18a801aacb098220e2c3eea59a512362eb408d4afd0c242044c33ad6d542"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f734e38fd8666f53da904c52a23ce517f1b07722118d750405af7e4123933511"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b40670ec7e2156d8e57f70aec34a7216407848dfe6c693ef131ddf6e76feb672"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdd215b7b7fd4a53994f238d0f46b7ba4ac4c0adb12452beee724ddd0743ae5d"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:017a21b0df49039c8f46ca0971b3a7fdc1f56741ab1240cb90ca408049766168"}, - {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99abf0bba688259a496f966211c49a514e65afa9b3073a1fcee08856e04425b"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:648056db9a9fa565d3fa851880f99f45e3f9a771dd3ff3bb0c048ea83fb28194"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8aacb477dc26797ee089721536a292a664846489c49d3ef9725f992449eda5a8"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:522a11c934ea660ff8953eda090dcd2154d367dec1ae3c540aff9f8a5c109ab4"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5bce0dc147ca85caa5d33debc4f4d65e8e8b5c97c7f9f660f215fa74fc49a321"}, - {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b4af9f25b49a7be47c0972139e59ec0e8285c371049df1a63b6ca81fdd216a2"}, - {file = "aiohttp-3.9.3-cp38-cp38-win32.whl", hash = "sha256:298abd678033b8571995650ccee753d9458dfa0377be4dba91e4491da3f2be63"}, - {file = "aiohttp-3.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:69361bfdca5468c0488d7017b9b1e5ce769d40b46a9f4a2eed26b78619e9396c"}, - {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0fa43c32d1643f518491d9d3a730f85f5bbaedcbd7fbcae27435bb8b7a061b29"}, - {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:835a55b7ca49468aaaac0b217092dfdff370e6c215c9224c52f30daaa735c1c1"}, - {file = "aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06a9b2c8837d9a94fae16c6223acc14b4dfdff216ab9b7202e07a9a09541168f"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abf151955990d23f84205286938796c55ff11bbfb4ccfada8c9c83ae6b3c89a3"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59c26c95975f26e662ca78fdf543d4eeaef70e533a672b4113dd888bd2423caa"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f95511dd5d0e05fd9728bac4096319f80615aaef4acbecb35a990afebe953b0e"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595f105710293e76b9dc09f52e0dd896bd064a79346234b521f6b968ffdd8e58"}, - {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c8b816c2b5af5c8a436df44ca08258fc1a13b449393a91484225fcb7545533"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f1088fa100bf46e7b398ffd9904f4808a0612e1d966b4aa43baa535d1b6341eb"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f59dfe57bb1ec82ac0698ebfcdb7bcd0e99c255bd637ff613760d5f33e7c81b3"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:361a1026c9dd4aba0109e4040e2aecf9884f5cfe1b1b1bd3d09419c205e2e53d"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:363afe77cfcbe3a36353d8ea133e904b108feea505aa4792dad6585a8192c55a"}, - {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e2c45c208c62e955e8256949eb225bd8b66a4c9b6865729a786f2aa79b72e9d"}, - {file = "aiohttp-3.9.3-cp39-cp39-win32.whl", hash = "sha256:f7217af2e14da0856e082e96ff637f14ae45c10a5714b63c77f26d8884cf1051"}, - {file = "aiohttp-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:27468897f628c627230dba07ec65dc8d0db566923c48f29e084ce382119802bc"}, - {file = "aiohttp-3.9.3.tar.gz", hash = "sha256:90842933e5d1ff760fae6caca4b2b3edba53ba8f4b71e95dacf2818a2aca06f7"}, -] - -[package.dependencies] -aiosignal = ">=1.1.2" -async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} -attrs = ">=17.3.0" -frozenlist = ">=1.1.1" -multidict = ">=4.5,<7.0" -yarl = ">=1.0,<2.0" - -[package.extras] -speedups = ["Brotli", "aiodns", "brotlicffi"] - -[[package]] -name = "aiosignal" -version = "1.3.1" -description = "aiosignal: a list of registered asynchronous callbacks" -optional = true -python-versions = ">=3.7" -files = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, -] - -[package.dependencies] -frozenlist = ">=1.1.0" +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -192,17 +54,6 @@ wrapt = [ {version = ">=1.14,<2", markers = "python_version >= \"3.11\""}, ] -[[package]] -name = "async-timeout" -version = "4.0.3" -description = "Timeout context manager for asyncio programs" -optional = true -python-versions = ">=3.7" -files = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] - [[package]] name = "attrs" version = "23.2.0" @@ -495,50 +346,6 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] -[[package]] -name = "datasets" -version = "2.16.0" -description = "HuggingFace community-driven open-source library of datasets" -optional = true -python-versions = ">=3.8.0" -files = [ - {file = "datasets-2.16.0-py3-none-any.whl", hash = "sha256:301cc39b3d81cd751100b79c85f8ae8626c17b0b113819ba2831c204d90b43f2"}, - {file = "datasets-2.16.0.tar.gz", hash = "sha256:91b06f7a8f0329179e7d603004102a6cc7a424a2f599315297a061caa1f8fa64"}, -] - -[package.dependencies] -aiohttp = "*" -dill = ">=0.3.0,<0.3.8" -filelock = "*" -fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]} -huggingface-hub = ">=0.19.4" -multiprocess = "*" -numpy = ">=1.17" -packaging = "*" -pandas = "*" -pyarrow = ">=8.0.0" -pyarrow-hotfix = "*" -pyyaml = ">=5.1" -requests = ">=2.19.0" -tqdm = ">=4.62.1" -xxhash = "*" - -[package.extras] -apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"] -audio = ["librosa", "soundfile (>=0.12.1)"] -benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.1.5)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] -docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"] -jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] -metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] -quality = ["ruff (>=0.1.5)"] -s3 = ["s3fs"] -tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"] -tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] -torch = ["torch"] -vision = ["Pillow (>=6.2.1)"] - [[package]] name = "dill" version = "0.3.7" @@ -573,17 +380,6 @@ idna = ["idna (>=2.1)"] trio = ["trio (>=0.14)"] wmi = ["wmi (>=1.5.1)"] -[[package]] -name = "einops" -version = "0.6.1" -description = "A new flavour of deep learning operations" -optional = true -python-versions = ">=3.7" -files = [ - {file = "einops-0.6.1-py3-none-any.whl", hash = "sha256:99149e46cc808956b174932fe563d920db4d6e5dadb8c6ecdaa7483b7ef7cfc3"}, - {file = "einops-0.6.1.tar.gz", hash = "sha256:f95f8d00f4ded90dbc4b19b6f98b177332614b0357dde66997f3ae5d474dc8c8"}, -] - [[package]] name = "email-validator" version = "2.1.0.post1" @@ -633,147 +429,6 @@ typing-extensions = ">=4.8.0" [package.extras] all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] -[[package]] -name = "filelock" -version = "3.13.1" -description = "A platform independent file lock." -optional = true -python-versions = ">=3.8" -files = [ - {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"}, - {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"}, -] - -[package.extras] -docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] -typing = ["typing-extensions (>=4.8)"] - -[[package]] -name = "frozenlist" -version = "1.4.1" -description = "A list-like structure which implements collections.abc.MutableSequence" -optional = true -python-versions = ">=3.8" -files = [ - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"}, - {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"}, - {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"}, - {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"}, - {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"}, - {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"}, - {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"}, - {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"}, - {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"}, - {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"}, - {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"}, - {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"}, - {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, -] - -[[package]] -name = "fsspec" -version = "2023.10.0" -description = "File-system specification" -optional = true -python-versions = ">=3.8" -files = [ - {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"}, - {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"}, -] - -[package.dependencies] -aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""} -requests = {version = "*", optional = true, markers = "extra == \"http\""} - -[package.extras] -abfs = ["adlfs"] -adl = ["adlfs"] -arrow = ["pyarrow (>=1)"] -dask = ["dask", "distributed"] -devel = ["pytest", "pytest-cov"] -dropbox = ["dropbox", "dropboxdrivefs", "requests"] -full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] -fuse = ["fusepy"] -gcs = ["gcsfs"] -git = ["pygit2"] -github = ["requests"] -gs = ["gcsfs"] -gui = ["panel"] -hdfs = ["pyarrow (>=1)"] -http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] -libarchive = ["libarchive-c"] -oci = ["ocifs"] -s3 = ["s3fs"] -sftp = ["paramiko"] -smb = ["smbprotocol"] -ssh = ["paramiko"] -tqdm = ["tqdm"] - [[package]] name = "gql" version = "3.5.0" @@ -956,43 +611,6 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] -[[package]] -name = "h5py" -version = "3.10.0" -description = "Read and write HDF5 files from Python" -optional = true -python-versions = ">=3.8" -files = [ - {file = "h5py-3.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b963fb772964fc1d1563c57e4e2e874022ce11f75ddc6df1a626f42bd49ab99f"}, - {file = "h5py-3.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:012ab448590e3c4f5a8dd0f3533255bc57f80629bf7c5054cf4c87b30085063c"}, - {file = "h5py-3.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:781a24263c1270a62cd67be59f293e62b76acfcc207afa6384961762bb88ea03"}, - {file = "h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42e6c30698b520f0295d70157c4e202a9e402406f50dc08f5a7bc416b24e52d"}, - {file = "h5py-3.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:93dd840bd675787fc0b016f7a05fc6efe37312a08849d9dd4053fd0377b1357f"}, - {file = "h5py-3.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2381e98af081b6df7f6db300cd88f88e740649d77736e4b53db522d8874bf2dc"}, - {file = "h5py-3.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:667fe23ab33d5a8a6b77970b229e14ae3bb84e4ea3382cc08567a02e1499eedd"}, - {file = "h5py-3.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90286b79abd085e4e65e07c1bd7ee65a0f15818ea107f44b175d2dfe1a4674b7"}, - {file = "h5py-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c013d2e79c00f28ffd0cc24e68665ea03ae9069e167087b2adb5727d2736a52"}, - {file = "h5py-3.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:92273ce69ae4983dadb898fd4d3bea5eb90820df953b401282ee69ad648df684"}, - {file = "h5py-3.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c97d03f87f215e7759a354460fb4b0d0f27001450b18b23e556e7856a0b21c3"}, - {file = "h5py-3.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86df4c2de68257b8539a18646ceccdcf2c1ce6b1768ada16c8dcfb489eafae20"}, - {file = "h5py-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba9ab36be991119a3ff32d0c7cbe5faf9b8d2375b5278b2aea64effbeba66039"}, - {file = "h5py-3.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c8e4fda19eb769e9a678592e67eaec3a2f069f7570c82d2da909c077aa94339"}, - {file = "h5py-3.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:492305a074327e8d2513011fa9fffeb54ecb28a04ca4c4227d7e1e9616d35641"}, - {file = "h5py-3.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9450464b458cca2c86252b624279115dcaa7260a40d3cb1594bf2b410a2bd1a3"}, - {file = "h5py-3.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd6f6d1384a9f491732cee233b99cd4bfd6e838a8815cc86722f9d2ee64032af"}, - {file = "h5py-3.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3074ec45d3dc6e178c6f96834cf8108bf4a60ccb5ab044e16909580352010a97"}, - {file = "h5py-3.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:212bb997a91e6a895ce5e2f365ba764debeaef5d2dca5c6fb7098d66607adf99"}, - {file = "h5py-3.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5dfc65ac21fa2f630323c92453cadbe8d4f504726ec42f6a56cf80c2f90d6c52"}, - {file = "h5py-3.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d4682b94fd36ab217352be438abd44c8f357c5449b8995e63886b431d260f3d3"}, - {file = "h5py-3.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aece0e2e1ed2aab076c41802e50a0c3e5ef8816d60ece39107d68717d4559824"}, - {file = "h5py-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43a61b2c2ad65b1fabc28802d133eed34debcc2c8b420cb213d3d4ef4d3e2229"}, - {file = "h5py-3.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:ae2f0201c950059676455daf92700eeb57dcf5caaf71b9e1328e6e6593601770"}, - {file = "h5py-3.10.0.tar.gz", hash = "sha256:d93adc48ceeb33347eb24a634fb787efc7ae4644e6ea4ba733d099605045c049"}, -] - -[package.dependencies] -numpy = ">=1.17.3" - [[package]] name = "httpcore" version = "0.17.3" @@ -1037,40 +655,6 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] -[[package]] -name = "huggingface-hub" -version = "0.23.2" -description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -optional = true -python-versions = ">=3.8.0" -files = [ - {file = "huggingface_hub-0.23.2-py3-none-any.whl", hash = "sha256:48727a16e704d409c4bb5913613308499664f22a99743435dc3a13b23c485827"}, - {file = "huggingface_hub-0.23.2.tar.gz", hash = "sha256:f6829b62d5fdecb452a76fdbec620cba4c1573655a8d710c1df71735fd9edbd2"}, -] - -[package.dependencies] -filelock = "*" -fsspec = ">=2023.5.0" -packaging = ">=20.9" -pyyaml = ">=5.1" -requests = "*" -tqdm = ">=4.42.1" -typing-extensions = ">=3.7.4.3" - -[package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] -cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] -fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] -hf-transfer = ["hf-transfer (>=0.1.4)"] -inference = ["aiohttp", "minijinja (>=1.0)"] -quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"] -tensorflow = ["graphviz", "pydot", "tensorflow"] -tensorflow-testing = ["keras (<3.0)", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] -torch = ["safetensors", "torch"] -typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] - [[package]] name = "idna" version = "3.6" @@ -1142,23 +726,6 @@ files = [ [package.extras] colors = ["colorama (>=0.4.6)"] -[[package]] -name = "jinja2" -version = "3.1.3" -description = "A very fast and expressive template engine." -optional = true -python-versions = ">=3.7" -files = [ - {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, - {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - [[package]] name = "jsonschema" version = "4.21.1" @@ -1243,75 +810,6 @@ files = [ {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"}, ] -[[package]] -name = "markupsafe" -version = "2.1.5" -description = "Safely add untrusted strings to HTML/XML markup." -optional = true -python-versions = ">=3.7" -files = [ - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, - {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, -] - [[package]] name = "mccabe" version = "0.7.0" @@ -1323,23 +821,6 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -[[package]] -name = "mpmath" -version = "1.3.0" -description = "Python library for arbitrary-precision floating-point arithmetic" -optional = true -python-versions = "*" -files = [ - {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, - {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, -] - -[package.extras] -develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] -docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] -tests = ["pytest (>=4.6)"] - [[package]] name = "multidict" version = "6.0.5" @@ -1439,34 +920,6 @@ files = [ {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, ] -[[package]] -name = "multiprocess" -version = "0.70.15" -description = "better multiprocessing and multithreading in Python" -optional = true -python-versions = ">=3.7" -files = [ - {file = "multiprocess-0.70.15-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:aa36c7ed16f508091438687fe9baa393a7a8e206731d321e443745e743a0d4e5"}, - {file = "multiprocess-0.70.15-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:20e024018c46d0d1602024c613007ac948f9754659e3853b0aa705e83f6931d8"}, - {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:e576062981c91f0fe8a463c3d52506e598dfc51320a8dd8d78b987dfca91c5db"}, - {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e73f497e6696a0f5433ada2b3d599ae733b87a6e8b008e387c62ac9127add177"}, - {file = "multiprocess-0.70.15-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73db2e7b32dcc7f9b0f075c2ffa45c90b6729d3f1805f27e88534c8d321a1be5"}, - {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:4271647bd8a49c28ecd6eb56a7fdbd3c212c45529ad5303b40b3c65fc6928e5f"}, - {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cf981fb998d6ec3208cb14f0cf2e9e80216e834f5d51fd09ebc937c32b960902"}, - {file = "multiprocess-0.70.15-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:18f9f2c7063346d1617bd1684fdcae8d33380ae96b99427260f562e1a1228b67"}, - {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:0eac53214d664c49a34695e5824872db4006b1a465edd7459a251809c3773370"}, - {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1a51dd34096db47fb21fa2b839e615b051d51b97af9a67afbcdaa67186b44883"}, - {file = "multiprocess-0.70.15-py310-none-any.whl", hash = "sha256:7dd58e33235e83cf09d625e55cffd7b0f0eede7ee9223cdd666a87624f60c21a"}, - {file = "multiprocess-0.70.15-py311-none-any.whl", hash = "sha256:134f89053d82c9ed3b73edd3a2531eb791e602d4f4156fc92a79259590bd9670"}, - {file = "multiprocess-0.70.15-py37-none-any.whl", hash = "sha256:f7d4a1629bccb433114c3b4885f69eccc200994323c80f6feee73b0edc9199c5"}, - {file = "multiprocess-0.70.15-py38-none-any.whl", hash = "sha256:bee9afba476c91f9ebee7beeee0601face9eff67d822e893f9a893725fbd6316"}, - {file = "multiprocess-0.70.15-py39-none-any.whl", hash = "sha256:3e0953f5d52b4c76f1c973eaf8214554d146f2be5decb48e928e55c7a2d19338"}, - {file = "multiprocess-0.70.15.tar.gz", hash = "sha256:f20eed3036c0ef477b07a4177cf7c1ba520d9a2677870a4f47fe026f0cd6787e"}, -] - -[package.dependencies] -dill = ">=0.3.7" - [[package]] name = "mypy" version = "1.8.0" @@ -1525,203 +978,6 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -[[package]] -name = "networkx" -version = "3.1" -description = "Python package for creating and manipulating graphs and networks" -optional = true -python-versions = ">=3.8" -files = [ - {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"}, - {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"}, -] - -[package.extras] -default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"] -developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"] -doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"] -extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"] -test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] - -[[package]] -name = "numpy" -version = "1.24.4" -description = "Fundamental package for array computing in Python" -optional = true -python-versions = ">=3.8" -files = [ - {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, - {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, - {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, - {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, - {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, - {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, - {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, - {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, - {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, - {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, - {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, - {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, - {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, - {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, - {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, - {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, - {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, - {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, - {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, - {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, - {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, - {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, - {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, - {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, - {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, -] - -[[package]] -name = "nvidia-cublas-cu12" -version = "12.1.3.1" -description = "CUBLAS native runtime libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, - {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"}, -] - -[[package]] -name = "nvidia-cuda-cupti-cu12" -version = "12.1.105" -description = "CUDA profiling tools runtime libs." -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, - {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"}, -] - -[[package]] -name = "nvidia-cuda-nvrtc-cu12" -version = "12.1.105" -description = "NVRTC native runtime libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, - {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"}, -] - -[[package]] -name = "nvidia-cuda-runtime-cu12" -version = "12.1.105" -description = "CUDA Runtime native Libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, - {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"}, -] - -[[package]] -name = "nvidia-cudnn-cu12" -version = "8.9.2.26" -description = "cuDNN runtime libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"}, -] - -[package.dependencies] -nvidia-cublas-cu12 = "*" - -[[package]] -name = "nvidia-cufft-cu12" -version = "11.0.2.54" -description = "CUFFT native runtime libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, - {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"}, -] - -[[package]] -name = "nvidia-curand-cu12" -version = "10.3.2.106" -description = "CURAND native runtime libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, - {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"}, -] - -[[package]] -name = "nvidia-cusolver-cu12" -version = "11.4.5.107" -description = "CUDA solver native runtime libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, - {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"}, -] - -[package.dependencies] -nvidia-cublas-cu12 = "*" -nvidia-cusparse-cu12 = "*" -nvidia-nvjitlink-cu12 = "*" - -[[package]] -name = "nvidia-cusparse-cu12" -version = "12.1.0.106" -description = "CUSPARSE native runtime libraries" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, - {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"}, -] - -[package.dependencies] -nvidia-nvjitlink-cu12 = "*" - -[[package]] -name = "nvidia-nccl-cu12" -version = "2.19.3" -description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d"}, -] - -[[package]] -name = "nvidia-nvjitlink-cu12" -version = "12.3.101" -description = "Nvidia JIT LTO Library" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"}, - {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux2014_aarch64.whl", hash = "sha256:211a63e7b30a9d62f1a853e19928fbb1a750e3f17a13a3d1f98ff0ced19478dd"}, - {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"}, -] - -[[package]] -name = "nvidia-nvtx-cu12" -version = "12.1.105" -description = "NVIDIA Tools Extension" -optional = true -python-versions = ">=3" -files = [ - {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, - {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"}, -] - [[package]] name = "packaging" version = "23.2" @@ -1733,73 +989,6 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] -[[package]] -name = "pandas" -version = "2.0.3" -description = "Powerful data structures for data analysis, time series, and statistics" -optional = true -python-versions = ">=3.8" -files = [ - {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, - {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, - {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, - {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, - {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, - {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, - {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, - {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, - {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, - {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, - {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, -] - -[package.dependencies] -numpy = [ - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.1" - -[package.extras] -all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] -aws = ["s3fs (>=2021.08.0)"] -clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] -compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] -computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2021.07.0)"] -gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] -hdf5 = ["tables (>=3.6.1)"] -html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] -mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] -spss = ["pyreadstat (>=1.1.2)"] -sql-other = ["SQLAlchemy (>=1.4.16)"] -test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.6.3)"] - [[package]] name = "pathspec" version = "0.9.0" @@ -1811,34 +1000,6 @@ files = [ {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, ] -[[package]] -name = "peft" -version = "0.6.0" -description = "Parameter-Efficient Fine-Tuning (PEFT)" -optional = true -python-versions = ">=3.8.0" -files = [ - {file = "peft-0.6.0-py3-none-any.whl", hash = "sha256:d7fb6335beb20074f70d464aa1f2bb1ddca0875126316320a2781b04364f72a6"}, - {file = "peft-0.6.0.tar.gz", hash = "sha256:6c381208f705cd38f2cc91dc2943ac4df2615680bd75d7320d010f8f2e48e65d"}, -] - -[package.dependencies] -accelerate = ">=0.21.0" -numpy = ">=1.17" -packaging = ">=20.0" -psutil = "*" -pyyaml = "*" -safetensors = "*" -torch = ">=1.13.0" -tqdm = "*" -transformers = "*" - -[package.extras] -dev = ["black (>=22.0,<23.0)", "hf-doc-builder", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"] -docs-specific = ["hf-doc-builder"] -quality = ["black (>=22.0,<23.0)", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"] -test = ["black (>=22.0,<23.0)", "datasets", "diffusers (<0.21.0)", "hf-doc-builder", "parameterized", "pytest", "pytest-cov", "pytest-xdist", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"] - [[package]] name = "pkgutil-resolve-name" version = "1.3.10" @@ -1900,93 +1061,6 @@ files = [ {file = "protobuf-5.27.1.tar.gz", hash = "sha256:df5e5b8e39b7d1c25b186ffdf9f44f40f810bbcc9d2b71d9d3156fee5a9adf15"}, ] -[[package]] -name = "psutil" -version = "5.9.8" -description = "Cross-platform lib for process and system monitoring in Python." -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" -files = [ - {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, - {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"}, - {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"}, - {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"}, - {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"}, - {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"}, - {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"}, - {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"}, - {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"}, - {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"}, - {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"}, - {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"}, - {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"}, - {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"}, - {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"}, - {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"}, -] - -[package.extras] -test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] - -[[package]] -name = "pyarrow" -version = "15.0.0" -description = "Python library for Apache Arrow" -optional = true -python-versions = ">=3.8" -files = [ - {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"}, - {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"}, - {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"}, - {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"}, - {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"}, - {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"}, - {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"}, - {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"}, - {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"}, - {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"}, - {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"}, - {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"}, - {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"}, - {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"}, - {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"}, - {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"}, -] - -[package.dependencies] -numpy = ">=1.16.6,<2" - -[[package]] -name = "pyarrow-hotfix" -version = "0.6" -description = "" -optional = true -python-versions = ">=3.5" -files = [ - {file = "pyarrow_hotfix-0.6-py3-none-any.whl", hash = "sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178"}, - {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"}, -] - [[package]] name = "pydantic" version = "2.6.1" @@ -2216,31 +1290,6 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -optional = true -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "pytz" -version = "2024.1" -description = "World timezone definitions, modern and historical" -optional = true -python-versions = "*" -files = [ - {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, - {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, -] - [[package]] name = "pyyaml" version = "6.0.1" @@ -2316,108 +1365,6 @@ files = [ attrs = ">=22.2.0" rpds-py = ">=0.7.0" -[[package]] -name = "regex" -version = "2023.12.25" -description = "Alternative regular expression module, to replace re." -optional = true -python-versions = ">=3.7" -files = [ - {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"}, - {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"}, - {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"}, - {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"}, - {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"}, - {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"}, - {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"}, - {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8a0ccf52bb37d1a700375a6b395bff5dd15c50acb745f7db30415bae3c2b0715"}, - {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c3c4a78615b7762740531c27cf46e2f388d8d727d0c0c739e72048beb26c8a9d"}, - {file = "regex-2023.12.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad83e7545b4ab69216cef4cc47e344d19622e28aabec61574b20257c65466d6a"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7a635871143661feccce3979e1727c4e094f2bdfd3ec4b90dfd4f16f571a87a"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d498eea3f581fbe1b34b59c697512a8baef88212f92e4c7830fcc1499f5b45a5"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43f7cd5754d02a56ae4ebb91b33461dc67be8e3e0153f593c509e21d219c5060"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51f4b32f793812714fd5307222a7f77e739b9bc566dc94a18126aba3b92b98a3"}, - {file = "regex-2023.12.25-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba99d8077424501b9616b43a2d208095746fb1284fc5ba490139651f971d39d9"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4bfc2b16e3ba8850e0e262467275dd4d62f0d045e0e9eda2bc65078c0110a11f"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8c2c19dae8a3eb0ea45a8448356ed561be843b13cbc34b840922ddf565498c1c"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:60080bb3d8617d96f0fb7e19796384cc2467447ef1c491694850ebd3670bc457"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b77e27b79448e34c2c51c09836033056a0547aa360c45eeeb67803da7b0eedaf"}, - {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:518440c991f514331f4850a63560321f833979d145d7d81186dbe2f19e27ae3d"}, - {file = "regex-2023.12.25-cp312-cp312-win32.whl", hash = "sha256:e2610e9406d3b0073636a3a2e80db05a02f0c3169b5632022b4e81c0364bcda5"}, - {file = "regex-2023.12.25-cp312-cp312-win_amd64.whl", hash = "sha256:cc37b9aeebab425f11f27e5e9e6cf580be7206c6582a64467a14dda211abc232"}, - {file = "regex-2023.12.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:da695d75ac97cb1cd725adac136d25ca687da4536154cdc2815f576e4da11c69"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d126361607b33c4eb7b36debc173bf25d7805847346dd4d99b5499e1fef52bc7"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4719bb05094d7d8563a450cf8738d2e1061420f79cfcc1fa7f0a44744c4d8f73"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dd58946bce44b53b06d94aa95560d0b243eb2fe64227cba50017a8d8b3cd3e2"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22a86d9fff2009302c440b9d799ef2fe322416d2d58fc124b926aa89365ec482"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2aae8101919e8aa05ecfe6322b278f41ce2994c4a430303c4cd163fef746e04f"}, - {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e692296c4cc2873967771345a876bcfc1c547e8dd695c6b89342488b0ea55cd8"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:263ef5cc10979837f243950637fffb06e8daed7f1ac1e39d5910fd29929e489a"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d6f7e255e5fa94642a0724e35406e6cb7001c09d476ab5fce002f652b36d0c39"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:88ad44e220e22b63b0f8f81f007e8abbb92874d8ced66f32571ef8beb0643b2b"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3a17d3ede18f9cedcbe23d2daa8a2cd6f59fe2bf082c567e43083bba3fb00347"}, - {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d15b274f9e15b1a0b7a45d2ac86d1f634d983ca40d6b886721626c47a400bf39"}, - {file = "regex-2023.12.25-cp37-cp37m-win32.whl", hash = "sha256:ed19b3a05ae0c97dd8f75a5d8f21f7723a8c33bbc555da6bbe1f96c470139d3c"}, - {file = "regex-2023.12.25-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d1047952c0b8104a1d371f88f4ab62e6275567d4458c1e26e9627ad489b445"}, - {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b43523d7bc2abd757119dbfb38af91b5735eea45537ec6ec3a5ec3f9562a1c53"}, - {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:efb2d82f33b2212898f1659fb1c2e9ac30493ac41e4d53123da374c3b5541e64"}, - {file = "regex-2023.12.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7fca9205b59c1a3d5031f7e64ed627a1074730a51c2a80e97653e3e9fa0d415"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086dd15e9435b393ae06f96ab69ab2d333f5d65cbe65ca5a3ef0ec9564dfe770"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e81469f7d01efed9b53740aedd26085f20d49da65f9c1f41e822a33992cb1590"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34e4af5b27232f68042aa40a91c3b9bb4da0eeb31b7632e0091afc4310afe6cb"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9852b76ab558e45b20bf1893b59af64a28bd3820b0c2efc80e0a70a4a3ea51c1"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff100b203092af77d1a5a7abe085b3506b7eaaf9abf65b73b7d6905b6cb76988"}, - {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cc038b2d8b1470364b1888a98fd22d616fba2b6309c5b5f181ad4483e0017861"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:094ba386bb5c01e54e14434d4caabf6583334090865b23ef58e0424a6286d3dc"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5cd05d0f57846d8ba4b71d9c00f6f37d6b97d5e5ef8b3c3840426a475c8f70f4"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:9aa1a67bbf0f957bbe096375887b2505f5d8ae16bf04488e8b0f334c36e31360"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:98a2636994f943b871786c9e82bfe7883ecdaba2ef5df54e1450fa9869d1f756"}, - {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37f8e93a81fc5e5bd8db7e10e62dc64261bcd88f8d7e6640aaebe9bc180d9ce2"}, - {file = "regex-2023.12.25-cp38-cp38-win32.whl", hash = "sha256:d78bd484930c1da2b9679290a41cdb25cc127d783768a0369d6b449e72f88beb"}, - {file = "regex-2023.12.25-cp38-cp38-win_amd64.whl", hash = "sha256:b521dcecebc5b978b447f0f69b5b7f3840eac454862270406a39837ffae4e697"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"}, - {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"}, - {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"}, - {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"}, -] - [[package]] name = "requests" version = "2.31.0" @@ -2585,125 +1532,6 @@ files = [ {file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"}, ] -[[package]] -name = "safetensors" -version = "0.4.1" -description = "" -optional = true -python-versions = ">=3.7" -files = [ - {file = "safetensors-0.4.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:cba01c6b76e01ec453933b3b3c0157c59b52881c83eaa0f7666244e71aa75fd1"}, - {file = "safetensors-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a8f6f679d97ea0135c7935c202feefbd042c149aa70ee759855e890c01c7814"}, - {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc2ce1f5ae5143a7fb72b71fa71db6a42b4f6cf912aa3acdc6b914084778e68"}, - {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2d87d993eaefe6611a9c241a8bd364a5f1ffed5771c74840363a6c4ed8d868f6"}, - {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:097e9af2efa8778cd2f0cba451784253e62fa7cc9fc73c0744d27212f7294e25"}, - {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d10a9f7bae608ccfdc009351f01dc3d8535ff57f9488a58a4c38e45bf954fe93"}, - {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:270b99885ec14abfd56c1d7f28ada81740a9220b4bae960c3de1c6fe84af9e4d"}, - {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:285b52a481e7ba93e29ad4ec5841ef2c4479ef0a6c633c4e2629e0508453577b"}, - {file = "safetensors-0.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c3c9f0ca510e0de95abd6424789dcbc879942a3a4e29b0dfa99d9427bf1da75c"}, - {file = "safetensors-0.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:88b4653059c903015284a9722f9a46838c654257173b279c8f6f46dbe80b612d"}, - {file = "safetensors-0.4.1-cp310-none-win32.whl", hash = "sha256:2fe6926110e3d425c4b684a4379b7796fdc26ad7d16922ea1696c8e6ea7e920f"}, - {file = "safetensors-0.4.1-cp310-none-win_amd64.whl", hash = "sha256:a79e16222106b2f5edbca1b8185661477d8971b659a3c814cc6f15181a9b34c8"}, - {file = "safetensors-0.4.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:d93321eea0dd7e81b283e47a1d20dee6069165cc158286316d0d06d340de8fe8"}, - {file = "safetensors-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ff8e41c8037db17de0ea2a23bc684f43eaf623be7d34906fe1ac10985b8365e"}, - {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39d36f1d88468a87c437a1bc27c502e71b6ca44c385a9117a9f9ba03a75cc9c6"}, - {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7ef010e9afcb4057fb6be3d0a0cfa07aac04fe97ef73fe4a23138d8522ba7c17"}, - {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b287304f2b2220d51ccb51fd857761e78bcffbeabe7b0238f8dc36f2edfd9542"}, - {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e09000b2599e1836314430f81a3884c66a5cbabdff5d9f175b5d560d4de38d78"}, - {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9c80ce0001efa16066358d2dd77993adc25f5a6c61850e4ad096a2232930bce"}, - {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:413e1f6ac248f7d1b755199a06635e70c3515493d3b41ba46063dec33aa2ebb7"}, - {file = "safetensors-0.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3ac139377cfe71ba04573f1cda66e663b7c3e95be850e9e6c2dd4b5984bd513"}, - {file = "safetensors-0.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:04157d008385bea66d12fe90844a80d4a76dc25ec5230b5bd9a630496d1b7c03"}, - {file = "safetensors-0.4.1-cp311-none-win32.whl", hash = "sha256:5f25297148ec665f0deb8bd67e9564634d8d6841041ab5393ccfe203379ea88b"}, - {file = "safetensors-0.4.1-cp311-none-win_amd64.whl", hash = "sha256:b2f8877990a72ff595507b80f4b69036a9a1986a641f8681adf3425d97d3d2a5"}, - {file = "safetensors-0.4.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:eb2c1da1cc39509d1a55620a5f4d14f8911c47a89c926a96e6f4876e864375a3"}, - {file = "safetensors-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:303d2c0415cf15a28f8d7f17379ea3c34c2b466119118a34edd9965983a1a8a6"}, - {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb4cb3e37a9b961ddd68e873b29fe9ab4a081e3703412e34aedd2b7a8e9cafd9"}, - {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae5497adc68669db2fed7cb2dad81e6a6106e79c9a132da3efdb6af1db1014fa"}, - {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b30abd0cddfe959d1daedf92edcd1b445521ebf7ddefc20860ed01486b33c90"}, - {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d784a98c492c751f228a4a894c3b8a092ff08b24e73b5568938c28b8c0e8f8df"}, - {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e57a5ab08b0ec7a7caf30d2ac79bb30c89168431aca4f8854464bb9461686925"}, - {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:edcf3121890b5f0616aa5a54683b1a5d2332037b970e507d6bb7841a3a596556"}, - {file = "safetensors-0.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fdb58dee173ef33634c3016c459d671ca12d11e6acf9db008261cbe58107e579"}, - {file = "safetensors-0.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:780dc21eb3fd32ddd0e8c904bdb0290f2454f4ac21ae71e94f9ce72db1900a5a"}, - {file = "safetensors-0.4.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:48901bd540f8a3c1791314bc5c8a170927bf7f6acddb75bf0a263d081a3637d4"}, - {file = "safetensors-0.4.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:3b0b7b2d5976fbed8a05e2bbdce5816a59e6902e9e7c7e07dc723637ed539787"}, - {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f69903ff49cb30b9227fb5d029bea276ea20d04b06803877a420c5b1b74c689"}, - {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0ddd050e01f3e843aa8c1c27bf68675b8a08e385d0045487af4d70418c3cb356"}, - {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a82bc2bd7a9a0e08239bdd6d7774d64121f136add93dfa344a2f1a6d7ef35fa"}, - {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ace9e66a40f98a216ad661245782483cf79cf56eb2b112650bb904b0baa9db5"}, - {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82cbb8f4d022f2e94498cbefca900698b8ded3d4f85212f47da614001ff06652"}, - {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:791edc10a3c359a2f5f52d5cddab0df8a45107d91027d86c3d44e57162e5d934"}, - {file = "safetensors-0.4.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:83c2cfbe8c6304f0891e7bb378d56f66d2148972eeb5f747cd8a2246886f0d8c"}, - {file = "safetensors-0.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:04dd14f53f5500eb4c4149674216ba1000670efbcf4b1b5c2643eb244e7882ea"}, - {file = "safetensors-0.4.1-cp37-none-win32.whl", hash = "sha256:d5b3defa74f3723a388bfde2f5d488742bc4879682bd93267c09a3bcdf8f869b"}, - {file = "safetensors-0.4.1-cp37-none-win_amd64.whl", hash = "sha256:25a043cbb59d4f75e9dd87fdf5c009dd8830105a2c57ace49b72167dd9808111"}, - {file = "safetensors-0.4.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:3f6a520af7f2717c5ecba112041f2c8af1ca6480b97bf957aba81ed9642e654c"}, - {file = "safetensors-0.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c3807ac3b16288dffebb3474b555b56fe466baa677dfc16290dcd02dca1ab228"}, - {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b58ba13a9e82b4bc3fc221914f6ef237fe6c2adb13cede3ace64d1aacf49610"}, - {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dac4bb42f8679aadc59bd91a4c5a1784a758ad49d0912995945cd674089f628e"}, - {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:911b48dc09e321a194def3a7431662ff4f03646832f3a8915bbf0f449b8a5fcb"}, - {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82571d20288c975c1b30b08deb9b1c3550f36b31191e1e81fae87669a92217d0"}, - {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da52ee0dc8ba03348ffceab767bd8230842fdf78f8a996e2a16445747143a778"}, - {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2536b11ce665834201072e9397404170f93f3be10cca9995b909f023a04501ee"}, - {file = "safetensors-0.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:998fbac99ca956c3a09fe07cc0b35fac26a521fa8865a690686d889f0ff4e4a6"}, - {file = "safetensors-0.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:845be0aafabf2a60c2d482d4e93023fecffe5e5443d801d7a7741bae9de41233"}, - {file = "safetensors-0.4.1-cp38-none-win32.whl", hash = "sha256:ce7a28bc8af685a69d7e869d09d3e180a275e3281e29cf5f1c7319e231932cc7"}, - {file = "safetensors-0.4.1-cp38-none-win_amd64.whl", hash = "sha256:e056fb9e22d118cc546107f97dc28b449d88274207dd28872bd668c86216e4f6"}, - {file = "safetensors-0.4.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:bdc0d039e44a727824639824090bd8869535f729878fa248addd3dc01db30eae"}, - {file = "safetensors-0.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c1b1d510c7aba71504ece87bf393ea82638df56303e371e5e2cf09d18977dd7"}, - {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bd0afd95c1e497f520e680ea01e0397c0868a3a3030e128438cf6e9e3fcd671"}, - {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f603bdd8deac6726d39f41688ed353c532dd53935234405d79e9eb53f152fbfb"}, - {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8a85e3e47e0d4eebfaf9a58b40aa94f977a56050cb5598ad5396a9ee7c087c6"}, - {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0ccb5aa0f3be2727117e5631200fbb3a5b3a2b3757545a92647d6dd8be6658f"}, - {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d784938534e255473155e4d9f276ee69eb85455b6af1292172c731409bf9adee"}, - {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a257de175c254d39ccd6a21341cd62eb7373b05c1e618a78096a56a857e0c316"}, - {file = "safetensors-0.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6fd80f7794554091836d4d613d33a7d006e2b8d6ba014d06f97cebdfda744f64"}, - {file = "safetensors-0.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:35803201d980efcf964b75a0a2aee97fe5e9ecc5f3ad676b38fafdfe98e0620d"}, - {file = "safetensors-0.4.1-cp39-none-win32.whl", hash = "sha256:7ff8a36e0396776d3ed9a106fc9a9d7c55d4439ca9a056a24bf66d343041d3e6"}, - {file = "safetensors-0.4.1-cp39-none-win_amd64.whl", hash = "sha256:bfa2e20342b81921b98edba52f8deb68843fa9c95250739a56b52ceda5ea5c61"}, - {file = "safetensors-0.4.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ae2d5a31cfb8a973a318f7c4d2cffe0bd1fe753cdf7bb41a1939d45a0a06f964"}, - {file = "safetensors-0.4.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1a45dbf03e8334d3a5dc93687d98b6dc422f5d04c7d519dac09b84a3c87dd7c6"}, - {file = "safetensors-0.4.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2297b359d91126c0f9d4fd17bae3cfa2fe3a048a6971b8db07db746ad92f850c"}, - {file = "safetensors-0.4.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bda3d98e2bcece388232cfc551ebf063b55bdb98f65ab54df397da30efc7dcc5"}, - {file = "safetensors-0.4.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8934bdfd202ebd0697040a3dff40dd77bc4c5bbf3527ede0532f5e7fb4d970f"}, - {file = "safetensors-0.4.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:42c3710cec7e5c764c7999697516370bee39067de0aa089b7e2cfb97ac8c6b20"}, - {file = "safetensors-0.4.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:53134226053e56bd56e73f7db42596e7908ed79f3c9a1016e4c1dade593ac8e5"}, - {file = "safetensors-0.4.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:257d59e40a1b367cb544122e7451243d65b33c3f34d822a347f4eea6fdf97fdf"}, - {file = "safetensors-0.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d54c2f1826e790d1eb2d2512bfd0ee443f0206b423d6f27095057c7f18a0687"}, - {file = "safetensors-0.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:645b3f1138fce6e818e79d4128afa28f0657430764cc045419c1d069ff93f732"}, - {file = "safetensors-0.4.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e9a7ffb1e551c6df51d267f5a751f042b183df22690f6feceac8d27364fd51d7"}, - {file = "safetensors-0.4.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:44e230fbbe120de564b64f63ef3a8e6ff02840fa02849d9c443d56252a1646d4"}, - {file = "safetensors-0.4.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9d16b3b2fcc6fca012c74bd01b5619c655194d3e3c13e4d4d0e446eefa39a463"}, - {file = "safetensors-0.4.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:5d95ea4d8b32233910734a904123bdd3979c137c461b905a5ed32511defc075f"}, - {file = "safetensors-0.4.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:dab431699b5d45e0ca043bc580651ce9583dda594e62e245b7497adb32e99809"}, - {file = "safetensors-0.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16d8bbb7344e39cb9d4762e85c21df94ebeb03edac923dd94bb9ed8c10eac070"}, - {file = "safetensors-0.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1faf5111c66a6ba91f85dff2e36edaaf36e6966172703159daeef330de4ddc7b"}, - {file = "safetensors-0.4.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:660ca1d8bff6c7bc7c6b30b9b32df74ef3ab668f5df42cefd7588f0d40feadcb"}, - {file = "safetensors-0.4.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ae2f67f04ed0bb2e56fd380a8bd3eef03f609df53f88b6f5c7e89c08e52aae00"}, - {file = "safetensors-0.4.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c8ed5d2c04cdc1afc6b3c28d59580448ac07732c50d94c15e14670f9c473a2ce"}, - {file = "safetensors-0.4.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:2b6a2814278b6660261aa9a9aae524616de9f1ec364e3716d219b6ed8f91801f"}, - {file = "safetensors-0.4.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3cfd1ca35eacc635f0eaa894e5c5ed83ffebd0f95cac298fd430014fa7323631"}, - {file = "safetensors-0.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4177b456c6b0c722d82429127b5beebdaf07149d265748e97e0a34ff0b3694c8"}, - {file = "safetensors-0.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:313e8472197bde54e3ec54a62df184c414582979da8f3916981b6a7954910a1b"}, - {file = "safetensors-0.4.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fdb4adb76e21bad318210310590de61c9f4adcef77ee49b4a234f9dc48867869"}, - {file = "safetensors-0.4.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1d568628e9c43ca15eb96c217da73737c9ccb07520fafd8a1eba3f2750614105"}, - {file = "safetensors-0.4.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:573b6023a55a2f28085fc0a84e196c779b6cbef4d9e73acea14c8094fee7686f"}, - {file = "safetensors-0.4.1.tar.gz", hash = "sha256:2304658e6ada81a5223225b4efe84748e760c46079bffedf7e321763cafb36c9"}, -] - -[package.extras] -all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"] -dev = ["safetensors[all]"] -jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"] -numpy = ["numpy (>=1.21.6)"] -paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] -pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] -quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] -tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] -testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"] -torch = ["safetensors[numpy]", "torch (>=1.10)"] - [[package]] name = "setuptools" version = "70.0.0" @@ -2770,137 +1598,6 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\"" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] -[[package]] -name = "sympy" -version = "1.12" -description = "Computer algebra system (CAS) in Python" -optional = true -python-versions = ">=3.8" -files = [ - {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, - {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, -] - -[package.dependencies] -mpmath = ">=0.19" - -[[package]] -name = "tokenizers" -version = "0.19.1" -description = "" -optional = true -python-versions = ">=3.7" -files = [ - {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"}, - {file = "tokenizers-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82c8b8063de6c0468f08e82c4e198763e7b97aabfe573fd4cf7b33930ca4df77"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f03727225feaf340ceeb7e00604825addef622d551cbd46b7b775ac834c1e1c4"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:453e4422efdfc9c6b6bf2eae00d5e323f263fff62b29a8c9cd526c5003f3f642"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02e81bf089ebf0e7f4df34fa0207519f07e66d8491d963618252f2e0729e0b46"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b07c538ba956843833fee1190cf769c60dc62e1cf934ed50d77d5502194d63b1"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28cab1582e0eec38b1f38c1c1fb2e56bce5dc180acb1724574fc5f47da2a4fe"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b01afb7193d47439f091cd8f070a1ced347ad0f9144952a30a41836902fe09e"}, - {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7fb297edec6c6841ab2e4e8f357209519188e4a59b557ea4fafcf4691d1b4c98"}, - {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e8a3dd055e515df7054378dc9d6fa8c8c34e1f32777fb9a01fea81496b3f9d3"}, - {file = "tokenizers-0.19.1-cp310-none-win32.whl", hash = "sha256:7ff898780a155ea053f5d934925f3902be2ed1f4d916461e1a93019cc7250837"}, - {file = "tokenizers-0.19.1-cp310-none-win_amd64.whl", hash = "sha256:bea6f9947e9419c2fda21ae6c32871e3d398cba549b93f4a65a2d369662d9403"}, - {file = "tokenizers-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5c88d1481f1882c2e53e6bb06491e474e420d9ac7bdff172610c4f9ad3898059"}, - {file = "tokenizers-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddf672ed719b4ed82b51499100f5417d7d9f6fb05a65e232249268f35de5ed14"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dadc509cc8a9fe460bd274c0e16ac4184d0958117cf026e0ea8b32b438171594"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfedf31824ca4915b511b03441784ff640378191918264268e6923da48104acc"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac11016d0a04aa6487b1513a3a36e7bee7eec0e5d30057c9c0408067345c48d2"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76951121890fea8330d3a0df9a954b3f2a37e3ec20e5b0530e9a0044ca2e11fe"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b342d2ce8fc8d00f376af068e3274e2e8649562e3bc6ae4a67784ded6b99428d"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16ff18907f4909dca9b076b9c2d899114dd6abceeb074eca0c93e2353f943aa"}, - {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:706a37cc5332f85f26efbe2bdc9ef8a9b372b77e4645331a405073e4b3a8c1c6"}, - {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16baac68651701364b0289979ecec728546133e8e8fe38f66fe48ad07996b88b"}, - {file = "tokenizers-0.19.1-cp311-none-win32.whl", hash = "sha256:9ed240c56b4403e22b9584ee37d87b8bfa14865134e3e1c3fb4b2c42fafd3256"}, - {file = "tokenizers-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:ad57d59341710b94a7d9dbea13f5c1e7d76fd8d9bcd944a7a6ab0b0da6e0cc66"}, - {file = "tokenizers-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:621d670e1b1c281a1c9698ed89451395d318802ff88d1fc1accff0867a06f153"}, - {file = "tokenizers-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d924204a3dbe50b75630bd16f821ebda6a5f729928df30f582fb5aade90c818a"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f3fefdc0446b1a1e6d81cd4c07088ac015665d2e812f6dbba4a06267d1a2c95"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9620b78e0b2d52ef07b0d428323fb34e8ea1219c5eac98c2596311f20f1f9266"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04ce49e82d100594715ac1b2ce87d1a36e61891a91de774755f743babcd0dd52"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5c2ff13d157afe413bf7e25789879dd463e5a4abfb529a2d8f8473d8042e28f"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3174c76efd9d08f836bfccaca7cfec3f4d1c0a4cf3acbc7236ad577cc423c840"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9d5b6c0e7a1e979bec10ff960fae925e947aab95619a6fdb4c1d8ff3708ce3"}, - {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a179856d1caee06577220ebcfa332af046d576fb73454b8f4d4b0ba8324423ea"}, - {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:952b80dac1a6492170f8c2429bd11fcaa14377e097d12a1dbe0ef2fb2241e16c"}, - {file = "tokenizers-0.19.1-cp312-none-win32.whl", hash = "sha256:01d62812454c188306755c94755465505836fd616f75067abcae529c35edeb57"}, - {file = "tokenizers-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:b70bfbe3a82d3e3fb2a5e9b22a39f8d1740c96c68b6ace0086b39074f08ab89a"}, - {file = "tokenizers-0.19.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:bb9dfe7dae85bc6119d705a76dc068c062b8b575abe3595e3c6276480e67e3f1"}, - {file = "tokenizers-0.19.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:1f0360cbea28ea99944ac089c00de7b2e3e1c58f479fb8613b6d8d511ce98267"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:71e3ec71f0e78780851fef28c2a9babe20270404c921b756d7c532d280349214"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b82931fa619dbad979c0ee8e54dd5278acc418209cc897e42fac041f5366d626"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8ff5b90eabdcdaa19af697885f70fe0b714ce16709cf43d4952f1f85299e73a"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e742d76ad84acbdb1a8e4694f915fe59ff6edc381c97d6dfdd054954e3478ad4"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8c5d59d7b59885eab559d5bc082b2985555a54cda04dda4c65528d90ad252ad"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b2da5c32ed869bebd990c9420df49813709e953674c0722ff471a116d97b22d"}, - {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:638e43936cc8b2cbb9f9d8dde0fe5e7e30766a3318d2342999ae27f68fdc9bd6"}, - {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:78e769eb3b2c79687d9cb0f89ef77223e8e279b75c0a968e637ca7043a84463f"}, - {file = "tokenizers-0.19.1-cp37-none-win32.whl", hash = "sha256:72791f9bb1ca78e3ae525d4782e85272c63faaef9940d92142aa3eb79f3407a3"}, - {file = "tokenizers-0.19.1-cp37-none-win_amd64.whl", hash = "sha256:f3bbb7a0c5fcb692950b041ae11067ac54826204318922da754f908d95619fbc"}, - {file = "tokenizers-0.19.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:07f9295349bbbcedae8cefdbcfa7f686aa420be8aca5d4f7d1ae6016c128c0c5"}, - {file = "tokenizers-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10a707cc6c4b6b183ec5dbfc5c34f3064e18cf62b4a938cb41699e33a99e03c1"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6309271f57b397aa0aff0cbbe632ca9d70430839ca3178bf0f06f825924eca22"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad23d37d68cf00d54af184586d79b84075ada495e7c5c0f601f051b162112dc"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:427c4f0f3df9109314d4f75b8d1f65d9477033e67ffaec4bca53293d3aca286d"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e83a31c9cf181a0a3ef0abad2b5f6b43399faf5da7e696196ddd110d332519ee"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c27b99889bd58b7e301468c0838c5ed75e60c66df0d4db80c08f43462f82e0d3"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bac0b0eb952412b0b196ca7a40e7dce4ed6f6926489313414010f2e6b9ec2adf"}, - {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6298bde623725ca31c9035a04bf2ef63208d266acd2bed8c2cb7d2b7d53ce6"}, - {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:08a44864e42fa6d7d76d7be4bec62c9982f6f6248b4aa42f7302aa01e0abfd26"}, - {file = "tokenizers-0.19.1-cp38-none-win32.whl", hash = "sha256:1de5bc8652252d9357a666e609cb1453d4f8e160eb1fb2830ee369dd658e8975"}, - {file = "tokenizers-0.19.1-cp38-none-win_amd64.whl", hash = "sha256:0bcce02bf1ad9882345b34d5bd25ed4949a480cf0e656bbd468f4d8986f7a3f1"}, - {file = "tokenizers-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0b9394bd204842a2a1fd37fe29935353742be4a3460b6ccbaefa93f58a8df43d"}, - {file = "tokenizers-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4692ab92f91b87769d950ca14dbb61f8a9ef36a62f94bad6c82cc84a51f76f6a"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6258c2ef6f06259f70a682491c78561d492e885adeaf9f64f5389f78aa49a051"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c85cf76561fbd01e0d9ea2d1cbe711a65400092bc52b5242b16cfd22e51f0c58"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670b802d4d82bbbb832ddb0d41df7015b3e549714c0e77f9bed3e74d42400fbe"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85aa3ab4b03d5e99fdd31660872249df5e855334b6c333e0bc13032ff4469c4a"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbf001afbbed111a79ca47d75941e9e5361297a87d186cbfc11ed45e30b5daba"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c89aa46c269e4e70c4d4f9d6bc644fcc39bb409cb2a81227923404dd6f5227"}, - {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:39c1ec76ea1027438fafe16ecb0fb84795e62e9d643444c1090179e63808c69d"}, - {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c2a0d47a89b48d7daa241e004e71fb5a50533718897a4cd6235cb846d511a478"}, - {file = "tokenizers-0.19.1-cp39-none-win32.whl", hash = "sha256:61b7fe8886f2e104d4caf9218b157b106207e0f2a4905c9c7ac98890688aabeb"}, - {file = "tokenizers-0.19.1-cp39-none-win_amd64.whl", hash = "sha256:f97660f6c43efd3e0bfd3f2e3e5615bf215680bad6ee3d469df6454b8c6e8256"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b11853f17b54c2fe47742c56d8a33bf49ce31caf531e87ac0d7d13d327c9334"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d26194ef6c13302f446d39972aaa36a1dda6450bc8949f5eb4c27f51191375bd"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e8d1ed93beda54bbd6131a2cb363a576eac746d5c26ba5b7556bc6f964425594"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca407133536f19bdec44b3da117ef0d12e43f6d4b56ac4c765f37eca501c7bda"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce05fde79d2bc2e46ac08aacbc142bead21614d937aac950be88dc79f9db9022"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:35583cd46d16f07c054efd18b5d46af4a2f070a2dd0a47914e66f3ff5efb2b1e"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b4399b59d1af5645bcee2072a463318114c39b8547437a7c2d6a186a1b5a0e2d"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6852c5b2a853b8b0ddc5993cd4f33bfffdca4fcc5d52f89dd4b8eada99379285"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd266ae85c3d39df2f7e7d0e07f6c41a55e9a3123bb11f854412952deacd828"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecb2651956eea2aa0a2d099434134b1b68f1c31f9a5084d6d53f08ed43d45ff2"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b279ab506ec4445166ac476fb4d3cc383accde1ea152998509a94d82547c8e2a"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:89183e55fb86e61d848ff83753f64cded119f5d6e1f553d14ffee3700d0a4a49"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2edbc75744235eea94d595a8b70fe279dd42f3296f76d5a86dde1d46e35f574"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:0e64bfde9a723274e9a71630c3e9494ed7b4c0f76a1faacf7fe294cd26f7ae7c"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b5ca92bfa717759c052e345770792d02d1f43b06f9e790ca0a1db62838816f3"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f8a20266e695ec9d7a946a019c1d5ca4eddb6613d4f466888eee04f16eedb85"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c38f45d8f2a2ec0f3a20073cccb335b9f99f73b3c69483cd52ebc75369d8a1"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dd26e3afe8a7b61422df3176e06664503d3f5973b94f45d5c45987e1cb711876"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:eddd5783a4a6309ce23432353cdb36220e25cbb779bfa9122320666508b44b88"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:56ae39d4036b753994476a1b935584071093b55c7a72e3b8288e68c313ca26e7"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f9939ca7e58c2758c01b40324a59c034ce0cebad18e0d4563a9b1beab3018243"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c330c0eb815d212893c67a032e9dc1b38a803eccb32f3e8172c19cc69fbb439"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec11802450a2487cdf0e634b750a04cbdc1c4d066b97d94ce7dd2cb51ebb325b"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b718f316b596f36e1dae097a7d5b91fc5b85e90bf08b01ff139bd8953b25af"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ed69af290c2b65169f0ba9034d1dc39a5db9459b32f1dd8b5f3f32a3fcf06eab"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f8a9c828277133af13f3859d1b6bf1c3cb6e9e1637df0e45312e6b7c2e622b1f"}, - {file = "tokenizers-0.19.1.tar.gz", hash = "sha256:ee59e6680ed0fdbe6b724cf38bd70400a0c1dd623b07ac729087270caeac88e3"}, -] - -[package.dependencies] -huggingface-hub = ">=0.16.4,<1.0" - -[package.extras] -dev = ["tokenizers[testing]"] -docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] -testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] - [[package]] name = "toml" version = "0.10.2" @@ -2934,64 +1631,6 @@ files = [ {file = "tomlkit-0.12.3.tar.gz", hash = "sha256:75baf5012d06501f07bee5bf8e801b9f343e7aac5a92581f20f80ce632e6b5a4"}, ] -[[package]] -name = "torch" -version = "2.2.0" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = true -python-versions = ">=3.8.0" -files = [ - {file = "torch-2.2.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d366158d6503a3447e67f8c0ad1328d54e6c181d88572d688a625fac61b13a97"}, - {file = "torch-2.2.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:707f2f80402981e9f90d0038d7d481678586251e6642a7a6ef67fc93511cb446"}, - {file = "torch-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:15c8f0a105c66b28496092fca1520346082e734095f8eaf47b5786bac24b8a31"}, - {file = "torch-2.2.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:0ca4df4b728515ad009b79f5107b00bcb2c63dc202d991412b9eb3b6a4f24349"}, - {file = "torch-2.2.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:3d3eea2d5969b9a1c9401429ca79efc668120314d443d3463edc3289d7f003c7"}, - {file = "torch-2.2.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0d1c580e379c0d48f0f0a08ea28d8e373295aa254de4f9ad0631f9ed8bc04c24"}, - {file = "torch-2.2.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9328e3c1ce628a281d2707526b4d1080eae7c4afab4f81cea75bde1f9441dc78"}, - {file = "torch-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:03c8e660907ac1b8ee07f6d929c4e15cd95be2fb764368799cca02c725a212b8"}, - {file = "torch-2.2.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:da0cefe7f84ece3e3b56c11c773b59d1cb2c0fd83ddf6b5f7f1fd1a987b15c3e"}, - {file = "torch-2.2.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f81d23227034221a4a4ff8ef24cc6cec7901edd98d9e64e32822778ff01be85e"}, - {file = "torch-2.2.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:dcbfb2192ac41ca93c756ebe9e2af29df0a4c14ee0e7a0dd78f82c67a63d91d4"}, - {file = "torch-2.2.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:9eeb42971619e24392c9088b5b6d387d896e267889d41d267b1fec334f5227c5"}, - {file = "torch-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:c718b2ca69a6cac28baa36d86d8c0ec708b102cebd1ceb1b6488e404cd9be1d1"}, - {file = "torch-2.2.0-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:f11d18fceb4f9ecb1ac680dde7c463c120ed29056225d75469c19637e9f98d12"}, - {file = "torch-2.2.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:ee1da852bfd4a7e674135a446d6074c2da7194c1b08549e31eae0b3138c6b4d2"}, - {file = "torch-2.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0d819399819d0862268ac531cf12a501c253007df4f9e6709ede8a0148f1a7b8"}, - {file = "torch-2.2.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:08f53ccc38c49d839bc703ea1b20769cc8a429e0c4b20b56921a9f64949bf325"}, - {file = "torch-2.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:93bffe3779965a71dab25fc29787538c37c5d54298fd2f2369e372b6fb137d41"}, - {file = "torch-2.2.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:c17ec323da778efe8dad49d8fb534381479ca37af1bfc58efdbb8607a9d263a3"}, - {file = "torch-2.2.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c02685118008834e878f676f81eab3a952b7936fa31f474ef8a5ff4b5c78b36d"}, - {file = "torch-2.2.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:d9f39d6f53cec240a0e3baa82cb697593340f9d4554cee6d3d6ca07925c2fac0"}, - {file = "torch-2.2.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:51770c065206250dc1222ea7c0eff3f88ab317d3e931cca2aee461b85fbc2472"}, - {file = "torch-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:008e4c6ad703de55af760c73bf937ecdd61a109f9b08f2bbb9c17e7c7017f194"}, - {file = "torch-2.2.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:de8680472dd14e316f42ceef2a18a301461a9058cd6e99a1f1b20f78f11412f1"}, - {file = "torch-2.2.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:99e1dcecb488e3fd25bcaac56e48cdb3539842904bdc8588b0b255fde03a254c"}, -] - -[package.dependencies] -filelock = "*" -fsspec = "*" -jinja2 = "*" -networkx = "*" -nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nccl-cu12 = {version = "2.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -sympy = "*" -triton = {version = "2.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -typing-extensions = ">=4.8.0" - -[package.extras] -opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.9.1)"] - [[package]] name = "tqdm" version = "4.66.2" @@ -3012,95 +1651,6 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] -[[package]] -name = "transformers" -version = "4.41.2" -description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" -optional = true -python-versions = ">=3.8.0" -files = [ - {file = "transformers-4.41.2-py3-none-any.whl", hash = "sha256:05555d20e43f808de1ef211ab64803cdb513170cef70d29a888b589caebefc67"}, - {file = "transformers-4.41.2.tar.gz", hash = "sha256:80a4db216533d573e9cc7388646c31ed9480918feb7c55eb211249cb23567f87"}, -] - -[package.dependencies] -filelock = "*" -huggingface-hub = ">=0.23.0,<1.0" -numpy = ">=1.17" -packaging = ">=20.0" -pyyaml = ">=5.1" -regex = "!=2019.12.17" -requests = "*" -safetensors = ">=0.4.1" -tokenizers = ">=0.19,<0.20" -tqdm = ">=4.27" - -[package.extras] -accelerate = ["accelerate (>=0.21.0)"] -agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"] -all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision"] -audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -codecarbon = ["codecarbon (==1.2.0)"] -deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"] -deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.19,<0.20)", "urllib3 (<2.0.0)"] -dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] -flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -ftfy = ["ftfy"] -integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"] -ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] -modelcreation = ["cookiecutter (==1.7.3)"] -natten = ["natten (>=0.14.6,<0.15.0)"] -onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] -onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] -optuna = ["optuna"] -quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<2.0.0)"] -ray = ["ray[tune] (>=2.7.0)"] -retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] -sagemaker = ["sagemaker (>=2.31.0)"] -sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] -serving = ["fastapi", "pydantic", "starlette", "uvicorn"] -sigopt = ["sigopt"] -sklearn = ["scikit-learn"] -speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] -tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] -tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -timm = ["timm"] -tokenizers = ["tokenizers (>=0.19,<0.20)"] -torch = ["accelerate (>=0.21.0)", "torch"] -torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.23.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.19,<0.20)", "torch", "tqdm (>=4.27)"] -video = ["av (==9.2.0)", "decord (==0.6.0)"] -vision = ["Pillow (>=10.0.1,<=15.0)"] - -[[package]] -name = "triton" -version = "2.2.0" -description = "A language and compiler for custom Deep Learning operations" -optional = true -python-versions = "*" -files = [ - {file = "triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2294514340cfe4e8f4f9e5c66c702744c4a117d25e618bd08469d0bfed1e2e5"}, - {file = "triton-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da58a152bddb62cafa9a857dd2bc1f886dbf9f9c90a2b5da82157cd2b34392b0"}, - {file = "triton-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af58716e721460a61886668b205963dc4d1e4ac20508cc3f623aef0d70283d5"}, - {file = "triton-2.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8fe46d3ab94a8103e291bd44c741cc294b91d1d81c1a2888254cbf7ff846dab"}, - {file = "triton-2.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ce26093e539d727e7cf6f6f0d932b1ab0574dc02567e684377630d86723ace"}, - {file = "triton-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:227cc6f357c5efcb357f3867ac2a8e7ecea2298cd4606a8ba1e931d1d5a947df"}, -] - -[package.dependencies] -filelock = "*" - -[package.extras] -build = ["cmake (>=3.20)", "lit"] -tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"] -tutorials = ["matplotlib", "pandas", "tabulate", "torch"] - [[package]] name = "typer" version = "0.9.0" @@ -3216,17 +1766,6 @@ files = [ {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, ] -[[package]] -name = "tzdata" -version = "2024.1" -description = "Provider of IANA time zone data" -optional = true -python-versions = ">=2" -files = [ - {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, - {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, -] - [[package]] name = "urllib3" version = "2.0.7" @@ -3342,123 +1881,6 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] -[[package]] -name = "xxhash" -version = "3.4.1" -description = "Python binding for xxHash" -optional = true -python-versions = ">=3.7" -files = [ - {file = "xxhash-3.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f"}, - {file = "xxhash-3.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2"}, - {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb11628470a6004dc71a09fe90c2f459ff03d611376c1debeec2d648f44cb693"}, - {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bef2a7dc7b4f4beb45a1edbba9b9194c60a43a89598a87f1a0226d183764189"}, - {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c0f7b2d547d72c7eda7aa817acf8791f0146b12b9eba1d4432c531fb0352228"}, - {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00f2fdef6b41c9db3d2fc0e7f94cb3db86693e5c45d6de09625caad9a469635b"}, - {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23cfd9ca09acaf07a43e5a695143d9a21bf00f5b49b15c07d5388cadf1f9ce11"}, - {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6a9ff50a3cf88355ca4731682c168049af1ca222d1d2925ef7119c1a78e95b3b"}, - {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f1d7c69a1e9ca5faa75546fdd267f214f63f52f12692f9b3a2f6467c9e67d5e7"}, - {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:672b273040d5d5a6864a36287f3514efcd1d4b1b6a7480f294c4b1d1ee1b8de0"}, - {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4178f78d70e88f1c4a89ff1ffe9f43147185930bb962ee3979dba15f2b1cc799"}, - {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9804b9eb254d4b8cc83ab5a2002128f7d631dd427aa873c8727dba7f1f0d1c2b"}, - {file = "xxhash-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c09c49473212d9c87261d22c74370457cfff5db2ddfc7fd1e35c80c31a8c14ce"}, - {file = "xxhash-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:ebbb1616435b4a194ce3466d7247df23499475c7ed4eb2681a1fa42ff766aff6"}, - {file = "xxhash-3.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:25dc66be3db54f8a2d136f695b00cfe88018e59ccff0f3b8f545869f376a8a46"}, - {file = "xxhash-3.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58c49083801885273e262c0f5bbeac23e520564b8357fbb18fb94ff09d3d3ea5"}, - {file = "xxhash-3.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b526015a973bfbe81e804a586b703f163861da36d186627e27524f5427b0d520"}, - {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36ad4457644c91a966f6fe137d7467636bdc51a6ce10a1d04f365c70d6a16d7e"}, - {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:248d3e83d119770f96003271fe41e049dd4ae52da2feb8f832b7a20e791d2920"}, - {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2070b6d5bbef5ee031666cf21d4953c16e92c2f8a24a94b5c240f8995ba3b1d0"}, - {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2746035f518f0410915e247877f7df43ef3372bf36cfa52cc4bc33e85242641"}, - {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a8ba6181514681c2591840d5632fcf7356ab287d4aff1c8dea20f3c78097088"}, - {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aac5010869240e95f740de43cd6a05eae180c59edd182ad93bf12ee289484fa"}, - {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4cb11d8debab1626181633d184b2372aaa09825bde709bf927704ed72765bed1"}, - {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b29728cff2c12f3d9f1d940528ee83918d803c0567866e062683f300d1d2eff3"}, - {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a15cbf3a9c40672523bdb6ea97ff74b443406ba0ab9bca10ceccd9546414bd84"}, - {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e66df260fed01ed8ea790c2913271641c58481e807790d9fca8bfd5a3c13844"}, - {file = "xxhash-3.4.1-cp311-cp311-win32.whl", hash = "sha256:e867f68a8f381ea12858e6d67378c05359d3a53a888913b5f7d35fbf68939d5f"}, - {file = "xxhash-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:200a5a3ad9c7c0c02ed1484a1d838b63edcf92ff538770ea07456a3732c577f4"}, - {file = "xxhash-3.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:1d03f1c0d16d24ea032e99f61c552cb2b77d502e545187338bea461fde253583"}, - {file = "xxhash-3.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c4bbba9b182697a52bc0c9f8ec0ba1acb914b4937cd4a877ad78a3b3eeabefb3"}, - {file = "xxhash-3.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9fd28a9da300e64e434cfc96567a8387d9a96e824a9be1452a1e7248b7763b78"}, - {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6066d88c9329ab230e18998daec53d819daeee99d003955c8db6fc4971b45ca3"}, - {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93805bc3233ad89abf51772f2ed3355097a5dc74e6080de19706fc447da99cd3"}, - {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64da57d5ed586ebb2ecdde1e997fa37c27fe32fe61a656b77fabbc58e6fbff6e"}, - {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a97322e9a7440bf3c9805cbaac090358b43f650516486746f7fa482672593df"}, - {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe750d512982ee7d831838a5dee9e9848f3fb440e4734cca3f298228cc957a6"}, - {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fd79d4087727daf4d5b8afe594b37d611ab95dc8e29fe1a7517320794837eb7d"}, - {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:743612da4071ff9aa4d055f3f111ae5247342931dedb955268954ef7201a71ff"}, - {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b41edaf05734092f24f48c0958b3c6cbaaa5b7e024880692078c6b1f8247e2fc"}, - {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:a90356ead70d715fe64c30cd0969072de1860e56b78adf7c69d954b43e29d9fa"}, - {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac56eebb364e44c85e1d9e9cc5f6031d78a34f0092fea7fc80478139369a8b4a"}, - {file = "xxhash-3.4.1-cp312-cp312-win32.whl", hash = "sha256:911035345932a153c427107397c1518f8ce456f93c618dd1c5b54ebb22e73747"}, - {file = "xxhash-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:f31ce76489f8601cc7b8713201ce94b4bd7b7ce90ba3353dccce7e9e1fee71fa"}, - {file = "xxhash-3.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:b5beb1c6a72fdc7584102f42c4d9df232ee018ddf806e8c90906547dfb43b2da"}, - {file = "xxhash-3.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6d42b24d1496deb05dee5a24ed510b16de1d6c866c626c2beb11aebf3be278b9"}, - {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b685fab18876b14a8f94813fa2ca80cfb5ab6a85d31d5539b7cd749ce9e3624"}, - {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419ffe34c17ae2df019a4685e8d3934d46b2e0bbe46221ab40b7e04ed9f11137"}, - {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e041ce5714f95251a88670c114b748bca3bf80cc72400e9f23e6d0d59cf2681"}, - {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc860d887c5cb2f524899fb8338e1bb3d5789f75fac179101920d9afddef284b"}, - {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:312eba88ffe0a05e332e3a6f9788b73883752be63f8588a6dc1261a3eaaaf2b2"}, - {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e01226b6b6a1ffe4e6bd6d08cfcb3ca708b16f02eb06dd44f3c6e53285f03e4f"}, - {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9f3025a0d5d8cf406a9313cd0d5789c77433ba2004b1c75439b67678e5136537"}, - {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:6d3472fd4afef2a567d5f14411d94060099901cd8ce9788b22b8c6f13c606a93"}, - {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:43984c0a92f06cac434ad181f329a1445017c33807b7ae4f033878d860a4b0f2"}, - {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a55e0506fdb09640a82ec4f44171273eeabf6f371a4ec605633adb2837b5d9d5"}, - {file = "xxhash-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:faec30437919555b039a8bdbaba49c013043e8f76c999670aef146d33e05b3a0"}, - {file = "xxhash-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:c9e1b646af61f1fc7083bb7b40536be944f1ac67ef5e360bca2d73430186971a"}, - {file = "xxhash-3.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:961d948b7b1c1b6c08484bbce3d489cdf153e4122c3dfb07c2039621243d8795"}, - {file = "xxhash-3.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:719a378930504ab159f7b8e20fa2aa1896cde050011af838af7e7e3518dd82de"}, - {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74fb5cb9406ccd7c4dd917f16630d2e5e8cbbb02fc2fca4e559b2a47a64f4940"}, - {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5dab508ac39e0ab988039bc7f962c6ad021acd81fd29145962b068df4148c476"}, - {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c59f3e46e7daf4c589e8e853d700ef6607afa037bfad32c390175da28127e8c"}, - {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cc07256eff0795e0f642df74ad096f8c5d23fe66bc138b83970b50fc7f7f6c5"}, - {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9f749999ed80f3955a4af0eb18bb43993f04939350b07b8dd2f44edc98ffee9"}, - {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7688d7c02149a90a3d46d55b341ab7ad1b4a3f767be2357e211b4e893efbaaf6"}, - {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a8b4977963926f60b0d4f830941c864bed16aa151206c01ad5c531636da5708e"}, - {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8106d88da330f6535a58a8195aa463ef5281a9aa23b04af1848ff715c4398fb4"}, - {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4c76a77dbd169450b61c06fd2d5d436189fc8ab7c1571d39265d4822da16df22"}, - {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:11f11357c86d83e53719c592021fd524efa9cf024dc7cb1dfb57bbbd0d8713f2"}, - {file = "xxhash-3.4.1-cp38-cp38-win32.whl", hash = "sha256:0c786a6cd74e8765c6809892a0d45886e7c3dc54de4985b4a5eb8b630f3b8e3b"}, - {file = "xxhash-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:aabf37fb8fa27430d50507deeab2ee7b1bcce89910dd10657c38e71fee835594"}, - {file = "xxhash-3.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6127813abc1477f3a83529b6bbcfeddc23162cece76fa69aee8f6a8a97720562"}, - {file = "xxhash-3.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef2e194262f5db16075caea7b3f7f49392242c688412f386d3c7b07c7733a70a"}, - {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71be94265b6c6590f0018bbf73759d21a41c6bda20409782d8117e76cd0dfa8b"}, - {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10e0a619cdd1c0980e25eb04e30fe96cf8f4324758fa497080af9c21a6de573f"}, - {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa122124d2e3bd36581dd78c0efa5f429f5220313479fb1072858188bc2d5ff1"}, - {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17032f5a4fea0a074717fe33477cb5ee723a5f428de7563e75af64bfc1b1e10"}, - {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca7783b20e3e4f3f52f093538895863f21d18598f9a48211ad757680c3bd006f"}, - {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d77d09a1113899fad5f354a1eb4f0a9afcf58cefff51082c8ad643ff890e30cf"}, - {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:21287bcdd299fdc3328cc0fbbdeaa46838a1c05391264e51ddb38a3f5b09611f"}, - {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:dfd7a6cc483e20b4ad90224aeb589e64ec0f31e5610ab9957ff4314270b2bf31"}, - {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:543c7fcbc02bbb4840ea9915134e14dc3dc15cbd5a30873a7a5bf66039db97ec"}, - {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fe0a98d990e433013f41827b62be9ab43e3cf18e08b1483fcc343bda0d691182"}, - {file = "xxhash-3.4.1-cp39-cp39-win32.whl", hash = "sha256:b9097af00ebf429cc7c0e7d2fdf28384e4e2e91008130ccda8d5ae653db71e54"}, - {file = "xxhash-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:d699b921af0dcde50ab18be76c0d832f803034d80470703700cb7df0fbec2832"}, - {file = "xxhash-3.4.1-cp39-cp39-win_arm64.whl", hash = "sha256:2be491723405e15cc099ade1280133ccfbf6322d2ef568494fb7d07d280e7eee"}, - {file = "xxhash-3.4.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:431625fad7ab5649368c4849d2b49a83dc711b1f20e1f7f04955aab86cd307bc"}, - {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc6dbd5fc3c9886a9e041848508b7fb65fd82f94cc793253990f81617b61fe49"}, - {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ff8dbd0ec97aec842476cb8ccc3e17dd288cd6ce3c8ef38bff83d6eb927817"}, - {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef73a53fe90558a4096e3256752268a8bdc0322f4692ed928b6cd7ce06ad4fe3"}, - {file = "xxhash-3.4.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:450401f42bbd274b519d3d8dcf3c57166913381a3d2664d6609004685039f9d3"}, - {file = "xxhash-3.4.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a162840cf4de8a7cd8720ff3b4417fbc10001eefdd2d21541a8226bb5556e3bb"}, - {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b736a2a2728ba45017cb67785e03125a79d246462dfa892d023b827007412c52"}, - {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0ae4c2e7698adef58710d6e7a32ff518b66b98854b1c68e70eee504ad061d8"}, - {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6322c4291c3ff174dcd104fae41500e75dad12be6f3085d119c2c8a80956c51"}, - {file = "xxhash-3.4.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:dd59ed668801c3fae282f8f4edadf6dc7784db6d18139b584b6d9677ddde1b6b"}, - {file = "xxhash-3.4.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92693c487e39523a80474b0394645b393f0ae781d8db3474ccdcead0559ccf45"}, - {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4603a0f642a1e8d7f3ba5c4c25509aca6a9c1cc16f85091004a7028607ead663"}, - {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa45e8cbfbadb40a920fe9ca40c34b393e0b067082d94006f7f64e70c7490a6"}, - {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:595b252943b3552de491ff51e5bb79660f84f033977f88f6ca1605846637b7c6"}, - {file = "xxhash-3.4.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:562d8b8f783c6af969806aaacf95b6c7b776929ae26c0cd941d54644ea7ef51e"}, - {file = "xxhash-3.4.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:41ddeae47cf2828335d8d991f2d2b03b0bdc89289dc64349d712ff8ce59d0647"}, - {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c44d584afdf3c4dbb3277e32321d1a7b01d6071c1992524b6543025fb8f4206f"}, - {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd7bddb3a5b86213cc3f2c61500c16945a1b80ecd572f3078ddbbe68f9dabdfb"}, - {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ecb6c987b62437c2f99c01e97caf8d25660bf541fe79a481d05732e5236719c"}, - {file = "xxhash-3.4.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:696b4e18b7023527d5c50ed0626ac0520edac45a50ec7cf3fc265cd08b1f4c03"}, - {file = "xxhash-3.4.1.tar.gz", hash = "sha256:0379d6cf1ff987cd421609a264ce025e74f346e3e145dd106c0cc2e3ec3f99a9"}, -] - [[package]] name = "yarl" version = "1.9.4" @@ -3577,10 +1999,7 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] -[extras] -mllib = ["accelerate", "datasets", "einops", "h5py", "peft", "safetensors", "transformers"] - [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "cef02f25d2bdc395f2187bf7b01eabd560ba18e597bf50005dd90b80fa25336c" +content-hash = "59be54627e27caf3aa6e089881036b45a65705fcd5f31c9165ddc203930d526d" diff --git a/pyproject.toml b/pyproject.toml index dc20e960..a6246f77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "friendli-client" -version = "1.4.2" +version = "1.5.0" description = "Client of Friendli Suite." license = "Apache-2.0" authors = ["FriendliAI teams "] @@ -34,16 +34,9 @@ rich = "^12.2.0" jsonschema = "^4.17.3" tqdm = "^4.48.0" pydantic = {extras = ["email"], version = ">=1.9.0, <3"} -transformers = { version = "4.41.2", optional = true } -h5py = { version = "^3.9.0", optional = true } -einops = { version = "^0.6.1", optional = true } -accelerate = { version = "0.21.0", optional = true } -datasets = { version = "2.16.0", optional = true } injector = "^0.21.0" protobuf = "^5.26.1" types-protobuf = "^5.26.0.20240422" -peft = { version = "0.6.0", optional = true } -safetensors = { version = "0.4.1", optional = true } httpx = "^0.24.1" fastapi = "^0.104.0" uvicorn = "^0.23.2" @@ -75,9 +68,6 @@ types-toml = "^0.10.8.6" types-tqdm = "^4.65.0.1" typer = "^0.9.0" -[tool.poetry.extras] -mllib = ["transformers", "h5py", "accelerate", "einops", "datasets", "peft", "safetensors"] - [tool.isort] profile = "black" known_local_folder = ["tests"] @@ -122,12 +112,6 @@ disable = [ ] extension-pkg-whitelist = "pydantic" -[tool.pylint.TYPECHECK] -generated-members = [ - "numpy.*" , - "torch.*" -] - [tool.pylint.check] ignored-classes = "Depends" ignore-patterns = [ diff --git a/tests/unit_tests/modules/__init__.py b/tests/unit_tests/modules/__init__.py deleted file mode 100644 index 1fc4d985..00000000 --- a/tests/unit_tests/modules/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. diff --git a/tests/unit_tests/modules/conftest.py b/tests/unit_tests/modules/conftest.py deleted file mode 100644 index f79aaa6a..00000000 --- a/tests/unit_tests/modules/conftest.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -from __future__ import annotations - -from typing import Any, Dict - -import pytest -from peft import PeftConfig -from transformers import ( - AutoConfig, - BlenderbotConfig, - BloomConfig, - CodeGenConfig, - FalconConfig, - GPT2Config, - GPTJConfig, - GPTNeoXConfig, - LlamaConfig, - MistralConfig, - MixtralConfig, - MptConfig, - OPTConfig, - T5Config, -) -from transformers.models.mpt.configuration_mpt import MptAttentionConfig - -from friendli.enums import ModelDataType -from friendli.modules.converter.base import OneOfConverter -from friendli.modules.converter.maps import get_hf_converter_factory -from friendli.modules.converter.models.mixtral import MixtralForCausalLMConverter -from friendli.modules.converter.utils import get_model_arch - -from tests.unit_tests.modules.helpers.utils import ModelConfig, get_param_specs - -model_name_config_map = { - "blenderbot": BlenderbotConfig( - architectures=["BlenderbotForConditionalGeneration"], - activation_function="gelu", - tie_word_embeddings=True, - decoder_attention_heads=32, - encoder_attention_heads=32, - decoder_ffn_dim=10240, - encoder_ffn_dim=10240, - encoder_layers=1, - decoder_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "bloom": BloomConfig( - architectures=["BloomForCausalLM"], - apply_residual_connection_post_layernorm=False, - slow_but_exact=False, - tie_word_embeddings=True, - layer_norm_epsilon=1e-5, - n_layer=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "codegen": CodeGenConfig( - architectures=["CodeGenForCausalLM"], - activation_function="gelu", - tie_word_embeddings=False, - layer_norm_epsilon=1e-5, - n_layer=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "falcon_7b": FalconConfig( # falcon-7b - architectures=["FalconForCausalLM"], - alibi=False, - bias=False, - new_decoder_architecture=False, - parallel_attn=True, - num_hidden_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "falcon": FalconConfig( # falcon-40b - architectures=["FalconForCausalLM"], - alibi=False, - bias=False, - new_decoder_architecture=True, - num_hidden_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "gpt_neox": GPTNeoXConfig( # pythia-1.4b - architectures=["GPTNeoXForCausalLM"], - hidden_act="gelu", - use_parallel_residual=True, - tie_word_embeddings=False, - layer_norm_eps=1e-5, - rotary_emb_base=10000, - num_hidden_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "gpt": GPT2Config( - architectures=["GPT2LMHeadModel"], - activation_function="gelu", - scale_attn_by_inverse_layer_idx=False, - tie_word_embeddings=True, - layer_norm_epsilon=1e-5, - n_layer=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "gpt_j": GPTJConfig( # gpt-j-6b - architectures=["GPTJForCausalLM"], - tie_word_embeddings=False, - layer_norm_epsilon=1e-5, - n_layer=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "llama": LlamaConfig( - architectures=["LlamaForCausalLM"], - hidden_act="silu", - tie_word_embeddings=False, - rms_norm_eps=1e-5, - num_hidden_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "mpt": MptConfig( - architectures=["MPTForCausalLM"], - attn_config=MptAttentionConfig( - alibi=True, - alibi_bias_max=8, - attn_type="multihead_attention", - prefix_lm=False, - qk_ln=False, - softmax_scale=None, - ), - expansion_ratio=4, - no_bias=True, - logit_scale=None, - n_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "opt": OPTConfig( - architectures=["OPTForCausalLM"], - activation_function="relu", - do_layer_norm_before=True, - word_embed_proj_dim=768, - hidden_size=768, - _remove_first_dropout=False, - tie_word_embeddings=True, - num_hidden_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "t5_v1_1": T5Config( - architectures=["T5ForConditionalGeneration"], - is_gated_act=True, - tie_word_embeddings=False, - num_hidden_layers=1, - num_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - relative_attention_num_buckets=32, # fixed value for t5 - ), - "t5": T5Config( - architectures=["T5ForConditionalGeneration"], - is_gated_act=False, - tie_word_embeddings=True, - layer_norm_epsilon=1e-6, - num_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - relative_attention_num_buckets=32, # fixed value for t5 - ), - "mistral": MistralConfig( # same as llama architecture - architectures=["MistralForCausalLM"], - hidden_act="silu", - tie_word_embeddings=False, - rope_theta=10000.0, - rms_norm_eps=1e-5, - num_hidden_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - "mixtral": MixtralConfig( # same as llama architecture - architectures=["MixtralForCausalLM"], - hidden_act="silu", - tie_word_embeddings=False, - rope_theta=10000.0, - rms_norm_eps=1e-5, - num_hidden_layers=1, - vocab_size=10000, - max_position_embeddings=1024, - ), - # TODO: add phi_msft - # TODO: add mpt with grouped querry attention (e.g. replit-code) -} - - -@pytest.fixture -def converter(model_config: AutoConfig) -> OneOfConverter: - model_arch = get_model_arch(model_config) - _, converter_cls = get_hf_converter_factory(model_arch) - return converter_cls(model_config, None, ModelDataType.FP16) - - -# TODO: add render_model_config per model -@pytest.fixture -def render_model_config(converter: OneOfConverter) -> ModelConfig: - return ModelConfig( - dtype="float16", - num_decoder_layers=converter.decoder_layer_num, - hidden_size=converter.decoder_hidden_size, - num_heads=converter.decoder_num_attention_heads, - num_kv_heads=converter.decoder_num_kv_attention_heads, - head_size=converter.decoder_head_size, - num_encoder_layers=converter.decoder_layer_num, # same as decoder for test - ff_intermediate_size=converter.decoder_ff_intermediate_size, - num_experts=converter.num_experts - if isinstance(converter, MixtralForCausalLMConverter) - else None, - ) - - -@pytest.fixture -def spec_data(model_name: str, render_model_config: ModelConfig) -> Dict[str, Any]: - param_specs = get_param_specs(model_name, "models", render_model_config) - return param_specs diff --git a/tests/unit_tests/modules/helpers/__init__.py b/tests/unit_tests/modules/helpers/__init__.py deleted file mode 100644 index 1fc4d985..00000000 --- a/tests/unit_tests/modules/helpers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. diff --git a/tests/unit_tests/modules/helpers/spec.py b/tests/unit_tests/modules/helpers/spec.py deleted file mode 100644 index 127d8d59..00000000 --- a/tests/unit_tests/modules/helpers/spec.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -"""Model spec utils""" - -from __future__ import annotations - -from enum import Enum -from pathlib import Path -from typing import Any, Dict, List, Tuple, Union - -import numpy as np -import yaml -from jinja2.environment import Template as JinjaTemplate -from pydantic import BaseModel - -from friendli.utils.compat import model_parse - - -class InvalidSpecFormatError(Exception): - """Invalid model spec format that can be handled by users.""" - - -class SpecNodeType(str, Enum): - """Model spec node type.""" - - DATA = "data" - GROUP = "group" - REPEAT_GROUP = "repeat_group" - - -class ParamInfo(BaseModel): - """Parameter info.""" - - name: str - dtype: np.dtype - shape: Tuple[int, ...] - - class Config: - arbitrary_types_allowed = ( - True # for np.dtype only check `isinstance(dtype, np.dtype)` - ) - - @classmethod - def load(cls, name: str, data: Dict[str, Any]) -> ParamInfo: - """Load a param info from data. - - Args: - name (str): Name of parameter. - data (dict[str, Any]): A dictionary describing the parameter info. - - Raises: - InvalidSpecFormatError: Raised if required key does not exist in data. - - Returns: - ParamInfo: Loaded param info. - - """ - try: - dtype = np.dtype(data["dtype"]) - return ParamInfo( - name=name, - dtype=dtype, - shape=tuple(map(int, data["shape"])), - ) - except (KeyError, AttributeError, TypeError) as exc: - raise InvalidSpecFormatError from exc - - -class RepeatRange(BaseModel): - """Repeat group's repeat range.""" - - lo: int - hi: int - - -class Template: - """Renderable YAML template.""" - - def __init__(self, jinja_template: JinjaTemplate): - self._jinja2_template = jinja_template - - @classmethod - def from_file(cls, path: Union[str, Path]) -> Template: - with open(path, "r") as f: - return cls(jinja_template=JinjaTemplate(f.read())) - - def render(self, **kwargs) -> Dict[str, Any] | List[Dict[str, Any]]: - """Render a Jinja2-YAML template with filling the variables. - - Returns: - dict[str, Any] | list[dict[str, Any]]: Rendered template in JSON format. - - """ - return yaml.safe_load(self._jinja2_template.render(**kwargs)) - - -class ModelSpecParser: - """Model spec parser""" - - def __init__(self, model_spec: Dict[str, Any]) -> None: - """Intialize model spec parser. - - Args: - model_spec (dict[str, Any]): A dictionary describing the entire model spec. - - """ - self._model_spec = model_spec - - def get_all_param_info(self) -> Dict[ParamInfo]: - """Get all parameter info specified in the model spec. - - Returns: - list[ParamInfo]: A list of param info. - - """ - return self._get_param_info(self._model_spec) - - def _get_param_info( - self, spec: Dict[str, Any], name_prefix: str = "" - ) -> Dict[ParamInfo]: - """Get a dictionary of param info in recursion. - - Args: - spec (dict[str, Any]): Full or partial model spec. - name_prefix (str, optional): Parsed name until the current recursion step. Defaults to "". - - Returns: - Dict[ParamInfo]: A dictionary of param info. - - """ - try: - node_type = spec["type"] - except KeyError as exc: - raise InvalidSpecFormatError from exc - - if node_type == SpecNodeType.DATA: - return {name_prefix: ParamInfo.load(name=name_prefix, data=spec)} - if node_type == SpecNodeType.GROUP: - res = {} - for child_name, child_spec in spec.items(): - if child_name == "type": - continue - res.update( - self._get_param_info( - spec=child_spec, - name_prefix=f"{name_prefix}/{child_name}" - if name_prefix - else child_name, - ) - ) - return res - if node_type == SpecNodeType.REPEAT_GROUP: - try: - repeat_range = model_parse(RepeatRange, spec["range"]) # type: ignore - except KeyError as exc: - raise InvalidSpecFormatError from exc - res = {} - - for i in range(repeat_range.lo, repeat_range.hi + 1): - for child_name, child_spec in spec.items(): - if child_name in ["type", "range"]: - continue - res.update( - self._get_param_info( - spec=child_spec, - name_prefix=f"{name_prefix.replace('*', str(i))}/{child_name}" - if name_prefix - else child_name, - ) - ) - return res - raise InvalidSpecFormatError diff --git a/tests/unit_tests/modules/helpers/utils.py b/tests/unit_tests/modules/helpers/utils.py deleted file mode 100644 index 1de9b23d..00000000 --- a/tests/unit_tests/modules/helpers/utils.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -from __future__ import annotations - -import os -from dataclasses import fields -from typing import Dict, Optional -from unittest.mock import Mock - -import numpy as np -import torch -from accelerate import init_empty_weights -from peft import PeftConfig, PeftModel -from pydantic import BaseModel -from transformers import PretrainedConfig - -from friendli.enums import ModelDataType -from friendli.modules.converter.maps import ( - get_adapter_converter_factory, - get_hf_converter_factory, -) -from friendli.modules.converter.utils import get_model_arch -from friendli.modules.quantizer.awq.base import AWQQuantizer -from friendli.modules.quantizer.layers import ( - WeightActQuantizedLinearLayer, - WeightOnlyQuantizedLinearLayer, -) -from friendli.modules.quantizer.schema.config import AWQConfig -from friendli.modules.quantizer.schema.data import QuantInput -from friendli.modules.quantizer.smoothquant.base import SmoothQuantQuantizer -from friendli.utils.compat import model_dump - -from tests.unit_tests.modules.helpers.spec import ModelSpecParser, ParamInfo, Template - -SPEC_PATH_PREFIX = os.path.join( - os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "specs/" -) - - -class ModelConfig(BaseModel): - """Adjustable model config.""" - - dtype: str - num_decoder_layers: int - hidden_size: int - num_encoder_layers: Optional[int] = None - ff_intermediate_size: Optional[int] = None - num_heads: Optional[int] = None - num_kv_heads: Optional[int] = None - head_size: Optional[int] = None - seq_len: Optional[int] = 1024 - vocab_size: Optional[int] = 10000 - num_experts: Optional[int] = 8 - - -class LoraAdapterConfig(ModelConfig): - """Adjustable model config.""" - - lora_rank_dim: int - - -class AWQModelConfig(ModelConfig): - """Adjustable model config for AWQ.""" - - group_size: int = 1 - q_dtype: str = "int8" - - -class SmoothQuantModelConfig(ModelConfig): - """Adjustable model config for SmoothQuant.""" - - attn_fc_smoothing: bool = False - ff2_smoothing: bool = False - q_dtype: str = "int8" - - -def get_numpy_data_type(data_type: ModelDataType) -> np.dtype: - if data_type == ModelDataType.FP32: - return np.float32 - elif data_type == ModelDataType.FP16: - return np.float16 - elif data_type == ModelDataType.BF16: - return np.uint32 - else: - return np.int8 - - -def get_param_specs( - model_name: str, spec_folder: str, model_config: ModelConfig -) -> Dict[str, ParamInfo]: - file_path = f"{SPEC_PATH_PREFIX}{spec_folder}/{model_name}.yaml" - template = Template.from_file(file_path) - render_config = model_dump(model_config) - rendered = template.render(**render_config) - assert isinstance(rendered, dict) - parser = ModelSpecParser(model_spec=rendered) - param_specs = parser.get_all_param_info() - return param_specs - - -def get_meta_model( - model_config: PretrainedConfig, -) -> torch.nn.Module: - model_arch = get_model_arch(model_config) - model_factory, _ = get_hf_converter_factory(model_arch) - with init_empty_weights(): - model = model_factory(config=model_config) - return model - - -def get_meta_model_with_adapter( - model_config: PretrainedConfig, adapter_config: PeftConfig -) -> torch.nn.Module: - model_arch = get_model_arch(model_config) - model_factory, _ = get_hf_converter_factory(model_arch) - with init_empty_weights(): - model = model_factory(config=model_config) - PeftModel(model, adapter_config) - return model - - -def get_smoothquant_quantized_meta_model( - model_config: PretrainedConfig, quantizer: SmoothQuantQuantizer -): - model = get_meta_model(model_config) - model = quantizer.hook.pre_smooth(model).to("meta") - - def weight_act_quant_layer(quant_input: QuantInput): - weight, start, end = ( - quant_input.weight, - quant_input.start_offset, - quant_input.end_offset, - ) - weight = weight[start:end] - return WeightActQuantizedLinearLayer( # meta quantized linear layer - in_features=weight.size(1), - out_features=weight.size(0), - q_weight=weight, - weight_scale=torch.zeros(weight.size(1), device="meta"), - act_scale=torch.zeros(weight.size(1), device="meta"), - ) - - for tf_quant_input in quantizer.hook.iter_tf_quant_inputs(model): - for field in fields(tf_quant_input): - quant_input = getattr(tf_quant_input, field.name) - if isinstance(quant_input, QuantInput): - weight_act_quant_layer = Mock(side_effect=weight_act_quant_layer) - q_layer = weight_act_quant_layer(quant_input) - tf_quant_input.block.add_module(field.name, q_layer) - - return model - - -def get_awq_quantized_meta_model( - model_config: PretrainedConfig, quantizer: AWQQuantizer, quant_config: AWQConfig -): - model = get_meta_model(model_config) - model = quantizer.hook.add_pre_scaler(model).to("meta") - - def weight_act_quant_layer(quant_input: QuantInput): - weight, start, end = ( - quant_input.weight, - quant_input.start_offset, - quant_input.end_offset, - ) - w = weight[start:end] - out_dim = w.size(0) - in_dim = w.size(1) - num_groups = in_dim // quant_config.awq_args.quant_group_size - return WeightOnlyQuantizedLinearLayer( # meta quantized linear layer - in_features=in_dim, - out_features=out_dim, - q_weight=w, - weight_scale=torch.zeros((num_groups, out_dim), device="meta"), - zeros=torch.zeros((num_groups, out_dim), device="meta"), - ) - - for tf_quant_input in quantizer.hook.iter_tf_quant_inputs(model): - for field in fields(tf_quant_input): - quant_input = getattr(tf_quant_input, field.name) - if isinstance(quant_input, QuantInput): - weight_only_quantzer = Mock(side_effect=weight_act_quant_layer) - q_layer = weight_only_quantzer(quant_input) - tf_quant_input.block.add_module(field.name, q_layer) - - return model diff --git a/tests/unit_tests/modules/specs/awq/gpt_j.yaml b/tests/unit_tests/modules/specs/awq/gpt_j.yaml deleted file mode 100644 index 21ee18a5..00000000 --- a/tests/unit_tests/modules/specs/awq/gpt_j.yaml +++ /dev/null @@ -1,162 +0,0 @@ -# Jinja2 template to validate GPT-J model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 3 | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 4 | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - awq: - pre_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 // group_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/awq/gpt_neox.yaml b/tests/unit_tests/modules/specs/awq/gpt_neox.yaml deleted file mode 100644 index ca93fd0f..00000000 --- a/tests/unit_tests/modules/specs/awq/gpt_neox.yaml +++ /dev/null @@ -1,175 +0,0 @@ -# Jinja2 template to validate GPT-NeoX model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 3 | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - awq: - type: group - pre_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 4 | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - awq: - type: group - pre_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 // group_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/awq/llama.yaml b/tests/unit_tests/modules/specs/awq/llama.yaml deleted file mode 100644 index 71984acc..00000000 --- a/tests/unit_tests/modules/specs/awq/llama.yaml +++ /dev/null @@ -1,157 +0,0 @@ -# Jinja2 template to validate LLaMA model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - c_proj: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - c_gate: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size // group_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/awq/mistral.yaml b/tests/unit_tests/modules/specs/awq/mistral.yaml deleted file mode 100644 index 71984acc..00000000 --- a/tests/unit_tests/modules/specs/awq/mistral.yaml +++ /dev/null @@ -1,157 +0,0 @@ -# Jinja2 template to validate LLaMA model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - c_proj: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - c_gate: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size // group_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/awq/mpt.yaml b/tests/unit_tests/modules/specs/awq/mpt.yaml deleted file mode 100644 index 9a1c736f..00000000 --- a/tests/unit_tests/modules/specs/awq/mpt.yaml +++ /dev/null @@ -1,137 +0,0 @@ -# Jinja2 template to validate MPT model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 3 | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - awq: - type: group - pre_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - awq: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 4 | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size // group_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - awq: - type: group - pre_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - scale:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 // group_size | int }} - - {{ hidden_size | int }} - zero:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 // group_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/lora/llama.yaml b/tests/unit_tests/modules/specs/lora/llama.yaml deleted file mode 100644 index 74d9de3c..00000000 --- a/tests/unit_tests/modules/specs/lora/llama.yaml +++ /dev/null @@ -1,110 +0,0 @@ -# Jinja2 template to validate Llama model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - lora: - type: group - query_A: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ lora_rank_dim | int }} - query_B: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ lora_rank_dim | int }} - - {{ num_heads * head_size | int }} - key_A: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ lora_rank_dim | int }} - key_B: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ lora_rank_dim | int }} - - {{ num_kv_heads * head_size | int }} - c_proj: - type: group - lora: - type: group - lora_A: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ num_heads * head_size | int }} - - {{ lora_rank_dim | int }} - lora_B: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ lora_rank_dim | int }} - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - lora: - type: group - lora_A: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ lora_rank_dim | int }} - lora_B: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ lora_rank_dim | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - lora: - type: group - lora_A: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ lora_rank_dim | int }} - lora_B: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ lora_rank_dim | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/lora/mpt.yaml b/tests/unit_tests/modules/specs/lora/mpt.yaml deleted file mode 100644 index 4bd1083d..00000000 --- a/tests/unit_tests/modules/specs/lora/mpt.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Jinja2 template to validate MPT model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - lora: - type: group - lora_A: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ lora_rank_dim | int}} - lora_B: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ lora_rank_dim | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - diff --git a/tests/unit_tests/modules/specs/models/ phi_msft.yaml b/tests/unit_tests/modules/specs/models/ phi_msft.yaml deleted file mode 100644 index 15d7f42b..00000000 --- a/tests/unit_tests/modules/specs/models/ phi_msft.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# Jinja2 template to validate phi-msft model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ((num_kv_heads * 2 + num_heads) * head_size | int )}} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int )}} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/blenderbot.yaml b/tests/unit_tests/modules/specs/models/blenderbot.yaml deleted file mode 100644 index 02dc7d93..00000000 --- a/tests/unit_tests/modules/specs/models/blenderbot.yaml +++ /dev/null @@ -1,243 +0,0 @@ -# Jinja2 template to validate Blenderbot model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - cross_attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 3 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ seq_len | int }} - - {{ hidden_size | int }} -encoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_encoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ seq_len | int }} - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/bloom.yaml b/tests/unit_tests/modules/specs/models/bloom.yaml deleted file mode 100644 index cb5539f9..00000000 --- a/tests/unit_tests/modules/specs/models/bloom.yaml +++ /dev/null @@ -1,113 +0,0 @@ -# Jinja2 template to validate Bloom model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -wte: - type: group - ln: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/codegen.yaml b/tests/unit_tests/modules/specs/models/codegen.yaml deleted file mode 100644 index 3e906ec4..00000000 --- a/tests/unit_tests/modules/specs/models/codegen.yaml +++ /dev/null @@ -1,101 +0,0 @@ -# Jinja2 template to validate Codegen model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/falcon.yaml b/tests/unit_tests/modules/specs/models/falcon.yaml deleted file mode 100644 index cb723f76..00000000 --- a/tests/unit_tests/modules/specs/models/falcon.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# Jinja2 template to validate Falcon model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/falcon_7b.yaml b/tests/unit_tests/modules/specs/models/falcon_7b.yaml deleted file mode 100644 index ca85b0a4..00000000 --- a/tests/unit_tests/modules/specs/models/falcon_7b.yaml +++ /dev/null @@ -1,86 +0,0 @@ -# Jinja2 template to validate Falcon 7B model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/gpt.yaml b/tests/unit_tests/modules/specs/models/gpt.yaml deleted file mode 100644 index a8e6ff2e..00000000 --- a/tests/unit_tests/modules/specs/models/gpt.yaml +++ /dev/null @@ -1,109 +0,0 @@ -# Jinja2 template to validate GPT model in Friendli format. - -type: group -decoder: - type: group - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ seq_len | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/gpt_j.yaml b/tests/unit_tests/modules/specs/models/gpt_j.yaml deleted file mode 100644 index 3417f790..00000000 --- a/tests/unit_tests/modules/specs/models/gpt_j.yaml +++ /dev/null @@ -1,101 +0,0 @@ -# Jinja2 template to validate GPT-J model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/gpt_neox.yaml b/tests/unit_tests/modules/specs/models/gpt_neox.yaml deleted file mode 100644 index 93341f45..00000000 --- a/tests/unit_tests/modules/specs/models/gpt_neox.yaml +++ /dev/null @@ -1,109 +0,0 @@ -# Jinja2 template to validate GPT-NeoX model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/llama.yaml b/tests/unit_tests/modules/specs/models/llama.yaml deleted file mode 100644 index d0f2266e..00000000 --- a/tests/unit_tests/modules/specs/models/llama.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Jinja2 template to validate LLaMA model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_gate: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/mistral.yaml b/tests/unit_tests/modules/specs/models/mistral.yaml deleted file mode 100644 index d0f2266e..00000000 --- a/tests/unit_tests/modules/specs/models/mistral.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Jinja2 template to validate LLaMA model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_gate: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/mixtral.yaml b/tests/unit_tests/modules/specs/models/mixtral.yaml deleted file mode 100644 index d0d79b01..00000000 --- a/tests/unit_tests/modules/specs/models/mixtral.yaml +++ /dev/null @@ -1,102 +0,0 @@ -# Jinja2 template to validate LLaMA model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - moe: - type: group - router: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int}} - - {{ num_experts | int }} - '*': - type: repeat_group - range: - lo: 0 - hi: {{ num_experts - 1 | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_gate: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/mpt.yaml b/tests/unit_tests/modules/specs/models/mpt.yaml deleted file mode 100644 index 701c56d2..00000000 --- a/tests/unit_tests/modules/specs/models/mpt.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# Jinja2 template to validate MPT model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/opt.yaml b/tests/unit_tests/modules/specs/models/opt.yaml deleted file mode 100644 index 2bc76839..00000000 --- a/tests/unit_tests/modules/specs/models/opt.yaml +++ /dev/null @@ -1,117 +0,0 @@ -# Jinja2 template to validate OPT model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ seq_len | int }} - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/t5.yaml b/tests/unit_tests/modules/specs/models/t5.yaml deleted file mode 100644 index 3f7b88fb..00000000 --- a/tests/unit_tests/modules/specs/models/t5.yaml +++ /dev/null @@ -1,165 +0,0 @@ -# Jinja2 template to validate T5 (t5-v1_1) model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - cross_attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 3 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: float32 - shape: - - {{ 32 | int }} - - {{ num_heads | int }} -encoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_encoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: float32 - shape: - - {{ 32 | int }} - - {{ num_heads | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/models/t5_v1_1.yaml b/tests/unit_tests/modules/specs/models/t5_v1_1.yaml deleted file mode 100644 index 3b99f73c..00000000 --- a/tests/unit_tests/modules/specs/models/t5_v1_1.yaml +++ /dev/null @@ -1,189 +0,0 @@ -# Jinja2 template to validate T5 (t5-v1_1) model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - cross_attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 3 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_gate: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: float32 - shape: - - {{ 32 | int }} - - {{ num_heads | int }} -encoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_encoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 3 | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_gate: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - c_proj: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: float32 - shape: - - {{ 32 | int }} - - {{ num_heads | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/bloom.yaml b/tests/unit_tests/modules/specs/smoothquant/bloom.yaml deleted file mode 100644 index c8a90282..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/bloom.yaml +++ /dev/null @@ -1,215 +0,0 @@ -# Jinja2 template to validate Bloom model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - smoothquant: - type: group - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -wte: - type: group - ln: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/codegen.yaml b/tests/unit_tests/modules/specs/smoothquant/codegen.yaml deleted file mode 100644 index 87013ae5..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/codegen.yaml +++ /dev/null @@ -1,215 +0,0 @@ -# Jinja2 template to validate Codegen model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - ln_2: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/falcon.yaml b/tests/unit_tests/modules/specs/smoothquant/falcon.yaml deleted file mode 100644 index 91b828ac..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/falcon.yaml +++ /dev/null @@ -1,191 +0,0 @@ -# Jinja2 template to validate Falcon model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/falcon_7b.yaml b/tests/unit_tests/modules/specs/smoothquant/falcon_7b.yaml deleted file mode 100644 index 0570e118..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/falcon_7b.yaml +++ /dev/null @@ -1,188 +0,0 @@ -# Jinja2 template to validate Falcon 7B model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ num_heads * head_size | int }} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ num_kv_heads * head_size | int }} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ num_kv_heads * head_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/gpt.yaml b/tests/unit_tests/modules/specs/smoothquant/gpt.yaml deleted file mode 100644 index a57b3952..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/gpt.yaml +++ /dev/null @@ -1,211 +0,0 @@ -# Jinja2 template to validate GPT model in Friendli format. - -type: group -decoder: - type: group - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - wpe: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ seq_len | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/gpt_j.yaml b/tests/unit_tests/modules/specs/smoothquant/gpt_j.yaml deleted file mode 100644 index dad7a61e..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/gpt_j.yaml +++ /dev/null @@ -1,215 +0,0 @@ -# Jinja2 template to validate GPT-J model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_1: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - ln_2: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/gpt_neox.yaml b/tests/unit_tests/modules/specs/smoothquant/gpt_neox.yaml deleted file mode 100644 index 08230d20..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/gpt_neox.yaml +++ /dev/null @@ -1,211 +0,0 @@ -# Jinja2 template to validate GPT-NeoX model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/llama.yaml b/tests/unit_tests/modules/specs/smoothquant/llama.yaml deleted file mode 100644 index f29d2f4e..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/llama.yaml +++ /dev/null @@ -1,206 +0,0 @@ -# Jinja2 template to validate LLaMA model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ (num_kv_heads * 2 + num_heads) * head_size | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ num_heads * head_size | int }} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ num_kv_heads * head_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ num_kv_heads * head_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ ff_intermediate_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_gate: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ ff_intermediate_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ ff_intermediate_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ ff_intermediate_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ ff_intermediate_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ ff_intermediate_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ ff_intermediate_size | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/mpt.yaml b/tests/unit_tests/modules/specs/smoothquant/mpt.yaml deleted file mode 100644 index 1d520f2c..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/mpt.yaml +++ /dev/null @@ -1,173 +0,0 @@ -# Jinja2 template to validate MPT model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - ln_f: - type: group - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/specs/smoothquant/opt.yaml b/tests/unit_tests/modules/specs/smoothquant/opt.yaml deleted file mode 100644 index cb76b1f8..00000000 --- a/tests/unit_tests/modules/specs/smoothquant/opt.yaml +++ /dev/null @@ -1,219 +0,0 @@ -# Jinja2 template to validate OPT model in Friendli format. - -type: group -decoder: - type: group - h_._*: - type: repeat_group - range: - lo: 0 - hi: {{ num_decoder_layers - 1 | int }} - attn: - type: group - c_attn: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 3 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 3 | int }} - - {{ hidden_size | int }} - q_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - q_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - k_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - v_out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int}} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if attn_fc_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - ln_*: - type: repeat_group - range: - lo: 1 - hi: 2 - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - mlp: - type: group - c_fc: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size * 4 | int }} - smoothquant: - type: group - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size * 4 | int }} - - {{ hidden_size | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - c_proj: - type: group - bias:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - smoothquant: - type: group - {% if ff2_smoothing %} - smoothing_vector:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - {% endif %} - weight:0: - type: data - dtype: {{ q_dtype }} - shape: - - {{ hidden_size | int }} - - {{ hidden_size * 4 | int }} - weight_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - out_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size | int }} - in_scale:0: - type: data - dtype: float32 - shape: - - {{ hidden_size * 4 | int }} - ln_f: - type: group - beta:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - gamma:0: - type: data - dtype: {{ dtype }} - shape: - - {{ hidden_size | int }} - wpe: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ seq_len | int }} - - {{ hidden_size | int }} -head_fc: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} -wte: - type: group - weight:0: - type: data - dtype: {{ dtype }} - shape: - - {{ vocab_size | int }} - - {{ hidden_size | int }} diff --git a/tests/unit_tests/modules/test_awq.py b/tests/unit_tests/modules/test_awq.py deleted file mode 100644 index 6123b159..00000000 --- a/tests/unit_tests/modules/test_awq.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -from __future__ import annotations - -from typing import Any, Dict - -import pytest - -from friendli.modules.converter.base import OneOfConverter -from friendli.modules.converter.utils import get_tensor_from_state_dict -from friendli.modules.quantizer.maps import get_quantized_converter -from friendli.modules.quantizer.schema.config import AWQConfig - -from tests.unit_tests.modules.conftest import model_name_config_map -from tests.unit_tests.modules.helpers.utils import ( - AWQModelConfig, - get_awq_quantized_meta_model, - get_numpy_data_type, - get_param_specs, -) - -awq_models = ["gpt_j", "gpt_neox", "llama", "mpt", "mistral"] -awq_model_name_config_map = {} -for model_name, model_config in model_name_config_map.items(): - if model_name in awq_models: - awq_model_name_config_map[model_name] = model_config - - -@pytest.fixture -def quant_config() -> AWQConfig: - return AWQConfig() - - -@pytest.fixture -def render_awq_model_config( - converter: OneOfConverter, quant_config: AWQConfig -) -> AWQModelConfig: - return AWQModelConfig( - dtype="float16", - num_decoder_layers=converter.decoder_layer_num, - hidden_size=converter.decoder_hidden_size, - num_heads=converter.decoder_num_attention_heads, - num_kv_heads=converter.decoder_num_kv_attention_heads, - head_size=converter.decoder_head_size, - num_encoder_layers=converter.decoder_layer_num, # same as decoder for test - ff_intermediate_size=converter.decoder_ff_intermediate_size, - group_size=quant_config.awq_args.quant_group_size, - q_dtype="int8", - ) - - -@pytest.fixture -def awq_spec_data( - model_name: str, render_awq_model_config: AWQModelConfig -) -> Dict[str, Any]: - param_specs = get_param_specs(model_name, "awq", render_awq_model_config) - return param_specs - - -@pytest.mark.parametrize( - "model_config", - awq_model_name_config_map.values(), -) -def test_convert_info_list_match_hf_state_dict( - converter: OneOfConverter, quant_config: AWQConfig -): - quantizer = get_quantized_converter(quant_config, converter) - convert_info_list = quantizer.get_convert_info_list() - assert len(convert_info_list) != 0 - quantized_model = get_awq_quantized_meta_model( - converter.config, quantizer, quant_config - ) - state_dict = quantized_model.state_dict() - for convert_info in convert_info_list: - param_names = convert_info.param_names - for param_name in param_names: - assert param_name in state_dict - - -@pytest.mark.parametrize( - "model_name, model_config", - awq_model_name_config_map.items(), -) -def test_quantized_model_match_spec( - converter: OneOfConverter, awq_spec_data: Dict[str, Any], quant_config: AWQConfig -): - quantizer = get_quantized_converter(quant_config, converter) - quantized_model = get_awq_quantized_meta_model( - converter.config, quantizer, quant_config - ) - state_dict = quantized_model.state_dict() - convert_info_list = quantizer.get_convert_info_list() - for convert_info in convert_info_list: - converted_name, reshape_fn, param_names, data_type = ( - convert_info.converted_name, - convert_info.reshape_fn, - convert_info.param_names, - convert_info.data_type, - ) - assert awq_spec_data[converted_name].dtype == get_numpy_data_type( - data_type - ), f"data type mismatch for {converted_name}: {param_names}" - params = [ - get_tensor_from_state_dict(state_dict, param_name) - for param_name in param_names - ] - reshaped_tensor = reshape_fn(params) - assert ( - awq_spec_data[converted_name].shape == reshaped_tensor.shape - ), f"shape mismatch for {converted_name}: {param_names}" diff --git a/tests/unit_tests/modules/test_converter.py b/tests/unit_tests/modules/test_converter.py deleted file mode 100644 index 3214c533..00000000 --- a/tests/unit_tests/modules/test_converter.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -from __future__ import annotations - -from typing import Any, Dict - -import pytest - -from friendli.modules.converter.base import OneOfConverter -from friendli.modules.converter.utils import get_tensor_from_state_dict - -from tests.unit_tests.modules.conftest import model_name_config_map -from tests.unit_tests.modules.helpers.utils import get_meta_model, get_numpy_data_type - - -@pytest.mark.parametrize( - "model_config", - model_name_config_map.values(), -) -def test_convert_info_list_match_hf_state_dict(converter: OneOfConverter): - convert_info_list = converter.get_convert_info_list() - assert len(convert_info_list) != 0 - model = get_meta_model(converter.config) - state_dict = model.state_dict() - for convert_info in convert_info_list: - param_names = convert_info.param_names - for param_name in param_names: - assert param_name in state_dict - - -@pytest.mark.parametrize( - "model_name, model_config", - model_name_config_map.items(), -) -def test_convert_info_list_match_spec( - converter: OneOfConverter, spec_data: Dict[str, Any] -): - convert_info_list = converter.get_convert_info_list() - assert len(convert_info_list) != 0 - converted_param_names = set() - for convert_info in convert_info_list: - converted_param_names.add(convert_info.converted_name) - - spec_converted_param_names = set(spec_data.keys()) - assert converted_param_names == spec_converted_param_names - - -@pytest.mark.parametrize( - "model_name, model_config", - model_name_config_map.items(), -) -def test_reshape_fn_match_spec(converter: OneOfConverter, spec_data: Dict[str, Any]): - convert_info_list = converter.get_convert_info_list() - model = get_meta_model(converter.config) - state_dict = model.state_dict() - for convert_info in convert_info_list: - converted_name, reshape_fn, param_names, data_type = ( - convert_info.converted_name, - convert_info.reshape_fn, - convert_info.param_names, - convert_info.data_type, - ) - assert spec_data[converted_name].dtype == get_numpy_data_type( - data_type - ), f"data type mismatch for {converted_name}: {param_names}" - params = [ - get_tensor_from_state_dict(state_dict, param_name) - for param_name in param_names - ] - reshaped_tensor = reshape_fn(params) - assert ( - spec_data[converted_name].shape == reshaped_tensor.shape - ), f"shape mismatch for {converted_name}: {param_names}" diff --git a/tests/unit_tests/modules/test_lora_adapter_converter.py b/tests/unit_tests/modules/test_lora_adapter_converter.py deleted file mode 100644 index e1626d09..00000000 --- a/tests/unit_tests/modules/test_lora_adapter_converter.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -from __future__ import annotations - -from typing import Any, Dict, cast - -import pytest -from peft import LoraConfig - -from friendli.modules.converter.base import DecoderOnlyConverter, OneOfConverter -from friendli.modules.converter.maps import get_adapter_converter_factory -from friendli.modules.converter.utils import get_model_arch, get_tensor_from_state_dict - -from tests.unit_tests.modules.conftest import model_name_config_map -from tests.unit_tests.modules.helpers.utils import ( - LoraAdapterConfig, - get_meta_model_with_adapter, - get_numpy_data_type, - get_param_specs, -) - -model_with_adpater = ["mpt", "llama"] -model_with_adpater_name_config_map = {} -for model_name, model_config in model_name_config_map.items(): - if model_name in model_with_adpater: - model_with_adpater_name_config_map[model_name] = model_config - - -@pytest.fixture -def adapter_config(converter: OneOfConverter) -> LoraConfig: - model_type = cast(DecoderOnlyConverter, converter).config.model_type - if model_type == "mpt": - return LoraConfig(target_modules=["Wqkv"]) - elif model_type == "llama": - return LoraConfig( - target_modules=["q_proj", "k_proj", "o_proj", "up_proj", "down_proj"] - ) - return LoraConfig() - - -@pytest.fixture -def render_lora_adapter_config( - converter: OneOfConverter, adapter_config: LoraConfig -) -> LoraAdapterConfig: - return LoraAdapterConfig( - dtype="float16", - num_decoder_layers=converter.decoder_layer_num, - hidden_size=converter.decoder_hidden_size, - num_heads=converter.decoder_num_attention_heads, - num_kv_heads=converter.decoder_num_kv_attention_heads, - head_size=converter.decoder_head_size, - num_encoder_layers=converter.decoder_layer_num, # same as decoder for test - ff_intermediate_size=converter.decoder_ff_intermediate_size, - lora_rank_dim=adapter_config.r, - ) - - -@pytest.fixture -def lora_spec_data( - model_name: str, render_lora_adapter_config: LoraAdapterConfig -) -> Dict[str, Any]: - param_specs = get_param_specs(model_name, "lora", render_lora_adapter_config) - return param_specs - - -@pytest.mark.parametrize( - "model_config", - model_with_adpater_name_config_map.values(), -) -def test_convert_info_list_match_hf_state_dict( - converter: OneOfConverter, - adapter_config: LoraConfig, -): - model_arch = get_model_arch(converter.config) - adapter_converter_cls = get_adapter_converter_factory(model_arch) - adapter_converter = adapter_converter_cls(converter, adapter_config) - - convert_info_list = adapter_converter.get_convert_info_list() - model_with_adapter = get_meta_model_with_adapter( - adapter_converter.converter.config, adapter_converter.adapter_config - ) - state_dict = model_with_adapter.state_dict() - for convert_info in convert_info_list: - param_names = convert_info.param_names - for param_name in param_names: - assert param_name in state_dict - - -@pytest.mark.parametrize( - "model_name, model_config", - model_with_adpater_name_config_map.items(), -) -def test_model_with_lora_match_spec( - converter: OneOfConverter, - lora_spec_data: Dict[str, Any], - adapter_config: LoraConfig, -): - model_arch = get_model_arch(converter.config) - adapter_converter_cls = get_adapter_converter_factory(model_arch) - adapter_converter = adapter_converter_cls(converter, adapter_config) - - convert_info_list = adapter_converter.get_convert_info_list() - model_with_adapter = get_meta_model_with_adapter( - adapter_converter.converter.config, adapter_converter.adapter_config - ) - state_dict = model_with_adapter.state_dict() - for convert_info in convert_info_list: - converted_name, reshape_fn, param_names, data_type = ( - convert_info.converted_name, - convert_info.reshape_fn, - convert_info.param_names, - convert_info.data_type, - ) - assert lora_spec_data[converted_name].dtype == get_numpy_data_type( - data_type - ), f"data type mismatch for {converted_name}: {param_names}" - params = [ - get_tensor_from_state_dict(state_dict, param_name) - for param_name in param_names - ] - reshaped_tensor = reshape_fn(params) - assert ( - lora_spec_data[converted_name].shape == reshaped_tensor.shape - ), f"shape mismatch for {converted_name}: {param_names}" diff --git a/tests/unit_tests/modules/test_smoothquant.py b/tests/unit_tests/modules/test_smoothquant.py deleted file mode 100644 index 06904bc5..00000000 --- a/tests/unit_tests/modules/test_smoothquant.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved. - -# # Copyright (c) 2022-present, FriendliAI Inc. All rights reserved. - -# from __future__ import annotations - -# from typing import Any, Dict - -# import pytest - -# from friendli.modules.converter.base import OneOfConverter -# from friendli.modules.converter.utils import get_tensor_from_state_dict -# from friendli.modules.quantizer.maps import get_quantized_converter -# from friendli.modules.quantizer.schema.config import SmoothQuantArgs, SmoothQuantConfig - -# from tests.unit_tests.modules.conftest import model_name_config_map -# from tests.unit_tests.modules.helpers.utils import ( -# SmoothQuantModelConfig, -# get_numpy_data_type, -# get_param_specs, -# get_smoothquant_quantized_meta_model, -# ) - -# smoothquant_models = [ -# "bloom", -# "codegen", -# "falcon", -# "falcon_7b", -# "gpt_j", -# "gpt_neox", -# "llama", -# "mpt", -# "opt", -# ] -# smoothquant_model_name_config_map = {} -# for model_name, model_config in model_name_config_map.items(): -# if model_name in smoothquant_models: -# smoothquant_model_name_config_map[model_name] = model_config - - -# @pytest.fixture -# def quant_config() -> SmoothQuantConfig: -# return SmoothQuantConfig( -# smoothquant_args=SmoothQuantArgs( -# attn_fc_smoothing=True, -# ff2_smoothing=True, -# ) -# ) - - -# @pytest.fixture -# def render_smoothquant_model_config( -# converter: OneOfConverter, quant_config: SmoothQuantConfig -# ) -> SmoothQuantModelConfig: -# return SmoothQuantModelConfig( -# dtype="float16", -# num_decoder_layers=converter.decoder_layer_num, -# hidden_size=converter.decoder_hidden_size, -# num_heads=converter.decoder_num_attention_heads, -# num_kv_heads=converter.decoder_num_kv_attention_heads, -# head_size=converter.decoder_head_size, -# num_encoder_layers=converter.decoder_layer_num, # same as decoder for test -# ff_intermediate_size=converter.decoder_ff_intermediate_size, -# attn_fc_smoothing=quant_config.smoothquant_args.attn_fc_smoothing, -# ff2_smoothing=quant_config.smoothquant_args.ff2_smoothing, -# q_dtype="int8", -# ) - - -# @pytest.fixture -# def smoothquant_spec_data( -# model_name: str, render_smoothquant_model_config: SmoothQuantModelConfig -# ) -> Dict[str, Any]: -# param_specs = get_param_specs( -# model_name, "smoothquant", render_smoothquant_model_config -# ) -# return param_specs - - -# @pytest.mark.parametrize( -# "model_config", -# smoothquant_model_name_config_map.values(), -# ) -# def test_convert_info_list_match_hf_state_dict( -# converter: OneOfConverter, quant_config: SmoothQuantConfig -# ): -# quantizer = get_quantized_converter(quant_config, converter) -# convert_info_list = quantizer.get_convert_info_list() -# assert len(convert_info_list) != 0 -# quantized_model = get_smoothquant_quantized_meta_model(converter.config, quantizer) -# state_dict = quantized_model.state_dict() -# for convert_info in convert_info_list: -# param_names = convert_info.param_names -# for param_name in param_names: -# assert param_name in state_dict - - -# @pytest.mark.parametrize( -# "model_name, model_config", -# smoothquant_model_name_config_map.items(), -# ) -# def test_quantized_model_match_spec( -# converter: OneOfConverter, -# smoothquant_spec_data: Dict[str, Any], -# quant_config: SmoothQuantConfig, -# ): -# quantizer = get_quantized_converter(quant_config, converter) -# quantized_model = get_smoothquant_quantized_meta_model(converter.config, quantizer) -# state_dict = quantized_model.state_dict() -# convert_info_list = quantizer.get_convert_info_list() -# for convert_info in convert_info_list: -# converted_name, reshape_fn, param_names, data_type = ( -# convert_info.converted_name, -# convert_info.reshape_fn, -# convert_info.param_names, -# convert_info.data_type, -# ) -# assert smoothquant_spec_data[converted_name].dtype == get_numpy_data_type( -# data_type -# ), f"data type mismatch for {converted_name}: {param_names}" -# params = [ -# get_tensor_from_state_dict(state_dict, param_name) -# for param_name in param_names -# ] -# reshaped_tensor = reshape_fn(params) -# assert ( -# smoothquant_spec_data[converted_name].shape == reshaped_tensor.shape -# ), f"shape mismatch for {converted_name}: {param_names}"