diff --git a/README.md b/README.md
index 593f5872..a5a9d666 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,11 @@ Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-->
-
+
+
+
+
+
Supercharge Generative AI Serving with Friendli 🚀
diff --git a/friendli/cli/model.py b/friendli/cli/model.py
index dff7bba3..6a50bd50 100644
--- a/friendli/cli/model.py
+++ b/friendli/cli/model.py
@@ -6,25 +6,12 @@
from __future__ import annotations
-import os
-from typing import Optional, cast
-
import typer
-import yaml
-from friendli.enums import CheckpointFileType, ModelDataType
-from friendli.errors import (
- CheckpointConversionError,
- InvalidConfigError,
- NotFoundError,
- NotSupportedQuantConfigError,
- QuantizationError,
-)
from friendli.formatter import TableFormatter
from friendli.sdk.client import Friendli
-from friendli.utils.compat import model_dump, model_parse
+from friendli.utils.compat import model_dump
from friendli.utils.decorator import check_api
-from friendli.utils.format import secho_error_and_exit
app = typer.Typer(
no_args_is_help=True,
@@ -53,350 +40,3 @@ def list_models():
models = client.model.list()
models_ = [model_dump(model) for model in iter(models)]
table_formatter.render(models_)
-
-
-@app.command()
-def convert(
- model_name_or_path: str = typer.Option(
- ...,
- "--model-name-or-path",
- "-m",
- help="Hugging Face pretrained model name or path to the saved model checkpoint.",
- ),
- output_dir: str = typer.Option(
- ...,
- "--output-dir",
- "-o",
- help=(
- "Directory path to save the converted checkpoint and related configuration "
- "files. Three files will be created in the directory: `model.h5`, "
- "`tokenizer.json`, and `attr.yaml`. "
- "The `model.h5` or `model.safetensors` is the converted checkpoint and can be renamed using "
- "the `--output-model-filename` option. "
- "The `tokenizer.json` is the Friendli-compatible tokenizer file, which should "
- "be uploaded along with the checkpoint file to tokenize the model input "
- "and output. "
- "The `attr.yaml` is the checkpoint attribute file, to be used when uploading "
- "the converted model to Friendli. You can designate the file name using "
- "the `--output-attr-filename` option."
- ),
- ),
- data_type: ModelDataType = typer.Option(
- None, "--data-type", "-dt", help="The data type of converted checkpoint."
- ),
- cache_dir: Optional[str] = typer.Option(
- None, "--cache-dir", help="Directory for downloading checkpoint."
- ),
- dry_run: bool = typer.Option(
- False, "--dry-run", help="Only check conversion avaliability."
- ),
- output_model_file_name: str = typer.Option(
- None,
- "--output-model-filename",
- help="Name of the converted checkpoint file."
- "The default file name is `model.h5` when `--output-ckpt-file-type` is `hdf5` or `model.safetensors` when `--output-ckpt-file-type` is `safetensors`.",
- ),
- output_ckpt_file_type: CheckpointFileType = typer.Option(
- CheckpointFileType.SAFETENSORS,
- "--output-ckpt-file-type",
- help="File format of the converted checkpoint file. The default output ckpt file type is `safetensors`.",
- ),
- output_attr_file_name: str = typer.Option(
- "attr.yaml",
- "--output-attr-filename",
- help="Name of the checkpoint attribute file.",
- ),
- quantize: bool = typer.Option(
- False,
- "--quantize",
- help="Quantize the model before conversion",
- ),
- quant_config_file: Optional[typer.FileText] = typer.Option(
- None,
- "--quant-config-file",
- help="Path to the quantization configuration file.",
- ),
-):
- """Convert huggingface's model checkpoint to Friendli format.
-
- When a checkpoint is in the Hugging Face format, it cannot be directly served.
- It requires conversion to the Friendli format for serving. The conversion
- process involves copying the original checkpoint and transforming it into a
- checkpoint in the Friendli format (*.h5).
-
- :::caution
- The `friendli model convert` is available only when the package is installed with
- `pip install "friendli-client[mllib]"`.
- :::
-
- ### Apply quantization
-
- If you want to quantize the model along with the conversion, `--quantize` option
- should be provided. You can customize the quantization configuration by describing
- it in a YAML file and providing the path to the file to `--quant-config-file`
- option. When `--quantize` option is used without providing `--quant-config-file`,
- the following configuration is used by default.
-
- ```yaml
- # Default quantization configuration
- mode: awq
- device: cuda:0
- seed: 42
- offload: true
- calibration_dataset:
- path_or_name: lambada
- format: json
- split: validation
- lookup_column_name: text
- num_samples: 128
- max_length: 512
- batch_size: 1
- awq_args:
- quant_bit: 4
- quant_group_size: 64
- ```
-
- - **`mode`**: Quantization scheme to apply. Defaults to "awq".
- - **`device`**: Device to run the quantization process. Defaults to "cuda:0".
- - **`seed`**: Random seed. Defaults to 42.
- - **`offload`**: When enabled, this option significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to true.
- - **`calibration_dataset`**
- - **`path_or_name`**: Path or name of the dataset. Datasets from either the Hugging Face Datasets Hub or local file system can be used. Defaults to "lambada".
- - **`format`**: Format of datasets. Defaults to "json".
- - **`split`**: Which split of the data to load. Defaults to "validation".
- - **`lookup_column_name`**: The name of a column in the dataset to be used as calibration inputs. Defaults to "text".
- - **`num_samples`**: The number of dataset samples to use for calibration. Note that the dataset will be shuffled before sampling. Defaults to 512.
- - **`max_length`**: The maximum length of a calibration input sequence. Defauts to 512.
- - **`batch_size`**: The number of samples to process in a single batch. Defaults to 1.
- - **`awq_args`** (Fill in this field only for "awq" mode)
- - **`quant_bit`** : Bit width of integers to represent weights. Possible values are `4` or `8`. Defaults to 4.
- - **`quant_group_size`**: Group size of quantized matrices. 64 is the only supported value at this time. Defaults to 64.
-
- :::tip
- If you encounter OOM issues when running with AWQ, try enabling the `offload` option.
- :::
-
- :::tip
- If you set `percentile` in quant-config-file into 100,
- the quantization range will be determined by the maximum absolute values of the activation tensors.
- :::
-
- :::info
- Currently, [AWQ](https://arxiv.org/abs/2306.00978) is the only supported quantization scheme.
- :::
-
- :::info
- AWQ is supported only for models with architecture listed as follows:
-
- - `GPTNeoXForCausalLM`
- - `GPTJForCausalLM`
- - `LlamaForCausalLM`
- - `MPTForCausalLM`
- :::
-
- """
- # pylint: disable=too-many-branches
- try:
- # pylint: disable=import-outside-toplevel
- from friendli.modules.converter.convert import convert_checkpoint
- from friendli.modules.quantizer.schema.config import (
- AWQConfig,
- OneOfQuantConfig,
- QuantConfig,
- )
- from friendli.modules.quantizer_v2.quantize import quantize_checkpoint
- from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig
-
- # pylint: enable=import-outside-toplevel
- except ModuleNotFoundError as exc:
- secho_error_and_exit(str(exc))
-
- if not os.path.isdir(output_dir):
- if os.path.exists(output_dir):
- secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.")
- os.mkdir(output_dir)
-
- quant_config: Optional[OneOfQuantConfig] = None
- use_quantizer_v2 = False
- if quantize:
- if quant_config_file:
- try:
- quant_config_dict = cast(dict, yaml.safe_load(quant_config_file.read()))
- except yaml.YAMLError as err:
- secho_error_and_exit(f"Failed to load the quant config file: {err}")
- if quant_config_dict["mode"] == "int8":
- quant_config = model_parse( # type: ignore
- Int8QuantConfig, quant_config_dict
- )
- else:
- quant_config = model_parse(
- QuantConfig, {"config": quant_config_dict}
- ).config
-
- # TODO(SA): All Quantization mode will be migrated to V2. After migration, please remove it.
- else:
- quant_config = AWQConfig()
-
- if isinstance(quant_config, Int8QuantConfig):
- use_quantizer_v2 = True
-
- default_names = {
- CheckpointFileType.HDF5: "model.h5",
- CheckpointFileType.SAFETENSORS: "model.safetensors",
- }
- output_model_file_name = (
- output_model_file_name or default_names[output_ckpt_file_type]
- )
-
- if use_quantizer_v2:
- if output_ckpt_file_type == CheckpointFileType.HDF5:
- secho_error_and_exit(
- f"int8 quantization only supports `safetensors` output_ckpt_file_type. Current output_ckpt_file_type: {output_ckpt_file_type}"
- )
- try:
- assert isinstance(quant_config, Int8QuantConfig)
- quantize_checkpoint(
- model_name_or_path=model_name_or_path,
- output_dir=output_dir,
- cache_dir=cache_dir,
- dry_run=dry_run,
- quant_config=quant_config,
- )
- except (NotFoundError, QuantizationError, NotSupportedQuantConfigError) as exc:
- secho_error_and_exit(str(exc))
- else:
- try:
- convert_checkpoint(
- model_name_or_path=model_name_or_path,
- output_model_file_name=output_model_file_name,
- output_ckpt_file_type=output_ckpt_file_type,
- output_attr_file_name=output_attr_file_name,
- output_dir=output_dir,
- data_type=data_type,
- cache_dir=cache_dir,
- dry_run=dry_run,
- quantize=quantize,
- quant_config=quant_config,
- )
- except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc:
- secho_error_and_exit(str(exc))
-
- msg = (
- f"Checkpoint({model_name_or_path}) can be converted."
- if dry_run
- else f"Checkpoint({model_name_or_path}) has been converted successfully."
- )
- typer.secho(msg)
-
-
-@app.command()
-def convert_adapter(
- adapter_name_or_path: str = typer.Option(
- ...,
- "--adapter-name-or-path",
- "-a",
- help="Hugging Face pretrained adapter name or path to the saved adapter checkpoint.",
- ),
- output_dir: str = typer.Option(
- ...,
- "--output-dir",
- "-o",
- help=(
- "Directory path to save the converted adapter checkpoint and related configuration "
- "files. Two files will be created in the directory: `adapter.h5`, "
- "and `attr.yaml`. "
- "The `adapter.h5` is the converted checkpoint and can be renamed using "
- "the `--output-adapter-filename` option. "
- "The `attr.yaml` is the adapter checkpoint attribute file, to be used when uploading "
- "the converted model to Friendli. You can designate the file name using "
- "the `--output-attr-filename` option."
- ),
- ),
- data_type: ModelDataType = typer.Option(
- None, "--data-type", "-dt", help="The data type of converted checkpoint."
- ),
- base_model_name_or_path: Optional[str] = typer.Option(
- None,
- "--base-model-name-or-path",
- "-b",
- help=(
- "Hugging Face model name or path to the saved backbone checkpoint. "
- "By default, we use the `base_model_name_or_path` in adapter_config.json."
- ),
- ),
- cache_dir: Optional[str] = typer.Option(
- None, "--cache-dir", help="Directory for downloading checkpoint."
- ),
- dry_run: bool = typer.Option(
- False, "--dry-run", help="Only check conversion avaliability."
- ),
- output_adapter_filename: str = typer.Option(
- "adapter.h5",
- "--output-adapter-filename",
- help="Name of the converted adapter checkpoint file.",
- ),
- output_attr_filename: str = typer.Option(
- "adapter_attr.yaml",
- "--output-attr-filename",
- help="Name of the adapter checkpoint attribute file.",
- ),
-) -> None:
- """Convert huggingface's adapter checkpoint to Friendli format.
-
- When an adapter checkpoint is in the Hugging Face PEFT format, it cannot
- be directly served in Friendli. It requires conversion to the Friendli format.
- The conversion process involves copying the original adapter checkpoint and
- transforming it into a checkpoint in the Friendli format (*.h5).
-
- This function does not include the `friendli model convert` command. i.e.
- `friendli model convert-adapter` only converts adapter's parameters, not backbone's.
-
- :::caution
- The `friendli model convert-adapter` is available only when the package is installed with
- `pip install "friendli-client[mllib]"`.
- :::
-
- """
- try:
- from friendli.modules.converter.convert import ( # pylint: disable=import-outside-toplevel
- convert_adapter_checkpoint,
- )
- except ModuleNotFoundError as exc:
- secho_error_and_exit(str(exc))
-
- if not os.path.isdir(output_dir):
- if os.path.exists(output_dir):
- secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.")
- os.mkdir(output_dir)
-
- # Engine cannot load a Safetensors Lora ckpt yet.
- output_adapter_file_type = CheckpointFileType.HDF5
- default_names = {
- CheckpointFileType.HDF5: "adapter.h5",
- CheckpointFileType.SAFETENSORS: "adapter.safetensors",
- }
- output_adapter_filename = (
- output_adapter_filename or default_names[output_adapter_file_type]
- )
-
- try:
- convert_adapter_checkpoint(
- adapter_name_or_path=adapter_name_or_path,
- output_attr_filename=output_attr_filename,
- output_dir=output_dir,
- output_adapter_filename=output_adapter_filename,
- base_model_name_or_path=base_model_name_or_path,
- data_type=data_type,
- output_adapter_file_type=output_adapter_file_type,
- cache_dir=cache_dir,
- dry_run=dry_run,
- )
- except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc:
- secho_error_and_exit(str(exc))
-
- msg = (
- f"Checkpoint({adapter_name_or_path}) can be converted."
- if dry_run
- else f"Checkpoint({adapter_name_or_path}) has been converted successfully."
- )
- typer.secho(msg)
diff --git a/friendli/modules/__init__.py b/friendli/modules/__init__.py
deleted file mode 100644
index e603ace1..00000000
--- a/friendli/modules/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli modules."""
diff --git a/friendli/modules/converter/__init__.py b/friendli/modules/converter/__init__.py
deleted file mode 100644
index d0213cf4..00000000
--- a/friendli/modules/converter/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli model converter."""
diff --git a/friendli/modules/converter/base.py b/friendli/modules/converter/base.py
deleted file mode 100644
index 9eaca2ec..00000000
--- a/friendli/modules/converter/base.py
+++ /dev/null
@@ -1,560 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Converter."""
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from collections.abc import Generator
-from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union, cast
-
-import numpy as np
-import torch
-from peft import PeftType # type: ignore[import] # pylint: disable=import-error
-from peft.config import PeftConfig
-from peft.tuners.lora import ( # type: ignore[import] # pylint: disable=import-error
- LoraConfig,
-)
-from transformers import GenerationConfig, PretrainedConfig # type: ignore[import]
-
-from friendli.enums import ModelDataType
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.interface import (
- DecoderTFBlockConversionInterface,
- EncoderTFBlockConversionInterface,
- ModelConversionInterface,
- NonTFBlockConversionInterface,
-)
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import get_model_data_type
-
-SUPPORTED_GELU_FAMILY = [
- "gelu",
- "gelu_fast",
- "gelu_new",
- "gelu_python",
- "gelu_pytorch_tanh",
- "gelu_accurate",
-]
-SUPPORTED_HEAD_SIZE = [64, 80, 96, 128, 256]
-
-MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP = {
- "gptj": {"q_proj", "k_proj", "v_proj", "out_proj", "fc_in", "fc_out", "wte"},
- "llama": {
- "q_proj",
- "k_proj",
- "v_proj",
- "o_proj",
- "gate_proj",
- "up_proj",
- "down_proj",
- },
- "mistral": {
- "q_proj",
- "k_proj",
- "v_proj",
- "o_proj",
- "gate_proj",
- "up_proj",
- "down_proj",
- },
- "mpt": {"Wqkv", "out_proj", "up_proj", "down_proj"},
-}
-# TODO: remove this const map when engine supports lm head LoRA
-MODEL_TYPE_TO_UNSUPPORTED_LORA_TARGET_MODULES_MAP = {
- "gptj": {"lm_head"},
- "llama": {"lm_head"},
- "mistral": {"lm_head"},
- "mpt": {"lm_head"},
-}
-
-ENCODER_PREFIX = "encoder"
-DECODER_PREFIX = "decoder"
-
-
-class AbstractConverter(ModelConversionInterface, ABC):
- """Abstract class for converting Hugging Face checkpoint to Friendli checkpoint.
-
- Attributes:
- config (PreTrainedConfig): Hugging Face model configuration.
- generation_config (Optional[GenerationConfig]): Hugginface generation config.
- When set to None, `config` is used for configuring generation.
- data_type (Optional(ModelDataType)): Data type for the Friendli checkpoint.
-
- """
-
- def __init__(
- self,
- config: PretrainedConfig,
- generation_config: Optional[GenerationConfig],
- data_type: Optional[ModelDataType],
- ) -> None:
- """Initialize converter."""
- self.config = config
- self.generation_config = generation_config
- self.data_type = (
- data_type if data_type else get_model_data_type(config.torch_dtype)
- )
-
- def get_eos_token_id(self) -> Optional[int]:
- """Get ID of EOS token."""
- generation_eos_token_id = None
- if self.generation_config is not None:
- generation_eos_token_id = self.generation_config.eos_token_id
-
- config_eos_token_id = self.config.eos_token_id
-
- if generation_eos_token_id is None:
- eos_token_id = config_eos_token_id
- else:
- if generation_eos_token_id != config_eos_token_id:
- logger.warn(
- "'eos_token' is different in generation_config (%s) and config (%s). "
- "Please fill the correct value.",
- generation_eos_token_id,
- config_eos_token_id,
- )
- eos_token_id = None
- else:
- eos_token_id = config_eos_token_id
-
- if eos_token_id is None:
- logger.warn(
- "'eos_token' cannot be automatically configured. "
- "Please fill in the field by yourself."
- )
-
- return eos_token_id
-
- def token_embed_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape embedding layer's weight to Friendli format.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped embedding weight.
-
- """
- assert len(params) == 1
- return params[0]
-
- def pos_embed_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape position embedding layer's weight to Friendli format.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped position embedding weight.
- """
- assert len(params) == 1
- return params[0]
-
- def head_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape head layer's weight to Friendli format.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped head weight.
-
- """
- assert len(params) == 1
- return params[0]
-
- def linear_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape linear layer's weight to Friendli format.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped linear weight.
-
- """
- assert len(params) == 1
- param = params[0].transpose(0, 1)
- return param
-
- def linear_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape linear layer's bias to Friendli format.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped linear bias.
-
- """
- assert len(params) == 1
- return params[0]
-
- def ln_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape layer norm layer's weight to Friendli format.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped layer norm weight.
-
- """
- assert len(params) == 1
- return params[0]
-
- def ln_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape layer norm layer's bias to Friendli format.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped layer norm bias.
-
- """
- assert len(params) == 1
- return params[0]
-
- def qkv_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape qkv layer's weight to Friendli format.
-
- In the original checkpoint, the qkv weight is stored as a single tensor or
- separated by three tensors. In the Friendli checkpoint, it is stored as a single tensor.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped qkv weight.
-
- """
- param = torch.cat(params, dim=0)
- param = param.transpose(0, 1)
- return param
-
- def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape qkv layer's bias to Friendli format.
-
- In the original checkpoint, the qkv weight is stored as a single tensor or
- separated by three tensors. In the Friendli checkpoint, it is stored as a single tensor.
-
- Args:
- state_dict (Dict[str, torch.Tensor]): The state_dict of the original checkpoint.
- layer (str): The layer name of the original checkpoint.
- per_layer_postfixes (List[str]): The list of postfixes of the layer.
-
- Returns:
- The tensor of reshaped qkv bias.
-
- """
- param = torch.cat(params, dim=0)
- return param
-
-
-class DecoderOnlyConverter(
- AbstractConverter,
- NonTFBlockConversionInterface,
- DecoderTFBlockConversionInterface,
-):
- """Converter for Decoder-Only models."""
-
- def check_config(self) -> None:
- """Check if a convertible form of the checkpoint from the decoder-only model config."""
- super().check_config()
- if self.decoder_head_size not in SUPPORTED_HEAD_SIZE:
- raise NotSupportedCheckpointError(
- invalid_option=f"decoder_head_size={self.decoder_head_size}",
- valid_options=SUPPORTED_HEAD_SIZE,
- )
-
- def get_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Get List of conversion informations for Decoder-Only model."""
- return self.non_transformer_convert_info_list + self.decoder_convert_info_list
-
-
-class EncoderDecoderConverter(
- AbstractConverter,
- NonTFBlockConversionInterface,
- EncoderTFBlockConversionInterface,
- DecoderTFBlockConversionInterface,
-):
- """Converter for Encoder-Decoder models."""
-
- def check_config(self) -> None:
- """Check if a convertible form of the checkpoint from the encoder-decoder model config."""
- if self.decoder_head_size not in SUPPORTED_HEAD_SIZE:
- raise NotSupportedCheckpointError(
- invalid_option=f"decoder_head_size={self.decoder_head_size}",
- valid_options=SUPPORTED_HEAD_SIZE,
- )
-
- def get_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Get list of conversion informations for Encoder-Decoder model."""
- return (
- self.non_transformer_convert_info_list
- + self.decoder_convert_info_list
- + self.encoder_convert_info_list
- )
-
- def get_decoder_start_token_id(self) -> Optional[int]:
- """Get ID of decoder start token."""
- generation_decoder_start_token_id = None
- if self.generation_config is not None:
- generation_decoder_start_token_id = (
- self.generation_config.decoder_start_token_id
- )
-
- config_decoder_start_token_id = self.config.decoder_start_token_id
-
- if generation_decoder_start_token_id is None:
- decoder_start_token_id = config_decoder_start_token_id
- else:
- if generation_decoder_start_token_id != config_decoder_start_token_id:
- logger.warn(
- "'decoder_start_token_id' is different in generation_config "
- "(%s) and config (%s). Please fill the correct value.",
- generation_decoder_start_token_id,
- config_decoder_start_token_id,
- )
- decoder_start_token_id = None
- else:
- decoder_start_token_id = config_decoder_start_token_id
-
- if decoder_start_token_id is None:
- logger.warn(
- "'decoder_start_token' cannot be automatically configured. "
- "Please fill in the field by yourself."
- )
-
- return decoder_start_token_id
-
-
-class DecoderOnlyLoraConverter(AbstractConverter):
- """Converter for LoRA modules in the models."""
-
- def __init__(
- self,
- converter: AbstractConverter,
- adapter_config: PeftConfig,
- ) -> None:
- """Initialize LoRA Converter."""
- super().__init__(
- config=converter.config,
- generation_config=converter.generation_config,
- data_type=converter.data_type,
- )
- self.converter = cast(DecoderOnlyConverter, converter)
- self.adapter_config = cast(LoraConfig, adapter_config)
-
- def check_config(self) -> None:
- """Check if a convertible form of the checkpoint from the LoRAconfig."""
- if self.adapter_config.peft_type != PeftType.LORA:
- raise NotSupportedCheckpointError(
- invalid_option=f"peft_type={self.adapter_config.peft_type}",
- valid_options=[str(PeftType.LORA)],
- )
- if (
- self.config.model_type
- not in MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP
- ):
- raise NotSupportedCheckpointError(
- invalid_option=f"model_type={self.config.model_type} for LORA",
- valid_options=list(
- MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP.keys()
- ),
- )
- if (
- self.adapter_config.layers_pattern is not None
- and len(self.adapter_config.layers_pattern) > 0
- ):
- raise NotSupportedCheckpointError(
- invalid_option=f"layers_pattern={self.adapter_config.layers_pattern}",
- valid_options=[None, [], ""],
- )
- if (
- self.adapter_config.rank_pattern is not None
- and len(self.adapter_config.rank_pattern) > 0
- ):
- raise NotSupportedCheckpointError(
- invalid_option=f"rank_pattern={self.adapter_config.rank_pattern}",
- valid_options=[None, {}],
- )
- if (
- self.adapter_config.alpha_pattern is not None
- and len(self.adapter_config.alpha_pattern) > 0
- ):
- raise NotSupportedCheckpointError(
- invalid_option=f"alpha_pattern={self.adapter_config.alpha_pattern}",
- valid_options=[None, {}],
- )
-
- if self.adapter_config.target_modules is not None:
- for target_module in self.adapter_config.target_modules:
- if (
- target_module
- not in MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP[
- self.config.model_type
- ]
- ):
- if (
- target_module
- in MODEL_TYPE_TO_UNSUPPORTED_LORA_TARGET_MODULES_MAP[
- self.config.model_type
- ]
- ):
- raise NotSupportedCheckpointError(
- invalid_option=f"target_module={target_module}",
- valid_options=list(
- MODEL_TYPE_TO_SUPPORTED_LORA_TARGET_MODULES_MAP[
- self.config.model_type
- ]
- ),
- )
-
- logger.warn(
- "Target module %s does not exist in the base model (%s). Will be ignored.",
- target_module,
- self.adapter_config.base_model_name_or_path,
- )
-
- if (self.adapter_config.layers_to_transform is not None) and (
- self.adapter_config != list(range(self.converter.decoder_layer_num))
- ):
- raise NotSupportedCheckpointError(
- invalid_option=f"layers_to_transform={self.adapter_config.layers_to_transform}",
- valid_options=[
- f"layers_to_transform=None"
- f"layers_to_transform={list(range(self.converter.decoder_layer_num))}",
- ],
- )
-
- def get_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Get convert dict for LoRA model."""
- return self.adapter_convert_info_list
-
- def _get_layers_to_transform(self) -> List[int]:
- layers_to_transform = cast(LoraConfig, self.adapter_config).layers_to_transform
- if layers_to_transform is None:
- layers_to_transform = list(range(self.converter.decoder_layer_num))
- else:
- if isinstance(layers_to_transform, int):
- layers_to_transform = [layers_to_transform]
- return layers_to_transform
-
- def lora_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """Reshape LoRA layer's weight to Friendli format."""
- assert len(params) == 1
- return params[0].transpose(0, 1)
-
- def pre_convert(self, model: torch.nn.Module) -> torch.nn.Module:
- """Preprocess the adapter modules before converting.
-
- All the parameters of the LoRA low-rank matrixs are converted by `lora_weight_reshape`.
- If the parameter can't be converted by `lora_weight_reshape`,
-
- """
- return model
-
- def convert( # pylint: disable=too-many-locals
- self,
- model: torch.nn.Module,
- convert_info_list: List[ConvertInfo],
- save_numpy_format: bool = True,
- ) -> Generator[Tuple[str, Union[np.ndarray, torch.Tensor]], None, None]:
- """Reshape Lora adapter model's all layer to Friendli format."""
- model = self.pre_convert(model)
- yield from self.converter.convert(model, convert_info_list, save_numpy_format)
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get adapter checkpoint attributes."""
- return {
- "name": "FILL ME",
- "type": "lora",
- "alpha": self.adapter_config.lora_alpha,
- "rank": self.adapter_config.r,
- "target-modules": list(self.adapter_target_modules),
- "ckpt-path": "FILL ME",
- }
-
- @property
- def adapter_target_modules(self) -> Set[str]:
- """Return the target modules that LoRA applies to."""
- if isinstance(self.adapter_config.target_modules, str):
- hf_target_modules = {self.adapter_config.target_modules}
- elif isinstance(self.adapter_config.target_modules, Iterable):
- hf_target_modules = set(self.adapter_config.target_modules)
- else:
- raise CheckpointConversionError("`target_modules` should not be None")
-
- translated_target_modules = set()
- for target in hf_target_modules:
- if target in self.adapter_target_module_map:
- translated_target_modules.add(self.adapter_target_module_map[target])
-
- return translated_target_modules
-
- @property
- @abstractmethod
- def adapter_target_module_map(self) -> Dict[str, str]:
- """Return the dictionary that maps Hugging Face's module name to Friendli's module name."""
-
- @property
- @abstractmethod
- def adapter_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for LoRA modules of the model."""
-
-
-OneOfAdapterConverter = DecoderOnlyLoraConverter
-OneOfConverter = Union[EncoderDecoderConverter, DecoderOnlyConverter]
-
-
-class FP8OnlyConverter(DecoderOnlyConverter):
- """FP8Only Architectures Converter Class."""
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- raise NotImplementedError("Not supported in FP8 Conversion.")
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks."""
- raise NotImplementedError("Not supported in FP8 Conversion.")
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks."""
- raise NotImplementedError("Not supported in FP8 Conversion.")
diff --git a/friendli/modules/converter/convert.py b/friendli/modules/converter/convert.py
deleted file mode 100644
index 4e4338e0..00000000
--- a/friendli/modules/converter/convert.py
+++ /dev/null
@@ -1,254 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Converter."""
-
-from __future__ import annotations
-
-import os
-from typing import Optional
-
-import yaml
-from peft import PeftModel # type: ignore[import] # pylint: disable=import-error
-
-from friendli.enums import CheckpointFileType, ModelDataType, QuantMode
-from friendli.errors import TokenizerNotFoundError
-from friendli.logging import logger
-from friendli.modules.converter.saver import get_saver
-from friendli.utils.validate import validate_convert_imports
-
-validate_convert_imports()
-# pylint: disable=import-outside-toplevel, wrong-import-position, wrong-import-order, ungrouped-imports
-import torch # type: ignore[import]
-from accelerate import init_empty_weights # type: ignore[import]
-
-from friendli.modules.converter.maps import (
- get_adapter_converter_factory,
- get_hf_converter_factory,
-)
-from friendli.modules.converter.utils import (
- get_adapter_config,
- get_model_arch,
- get_model_generation_config,
- get_model_pretrained_config,
- get_torch_data_type,
- save_tokenizer,
-)
-from friendli.modules.quantizer.maps import get_quantized_converter
-from friendli.modules.quantizer.schema.config import OneOfQuantConfig
-
-# pylint: enable=import-outside-toplevel, wrong-import-position, wrong-import-order, ungrouped-imports
-
-
-def convert_checkpoint( # pylint: disable=too-many-branches
- model_name_or_path: str,
- output_model_file_name: str,
- output_attr_file_name: str,
- output_dir: str,
- output_ckpt_file_type: CheckpointFileType,
- *,
- data_type: Optional[ModelDataType] = None,
- cache_dir: Optional[str] = None,
- dry_run: bool = False,
- quantize: bool = False,
- quant_config: Optional[OneOfQuantConfig] = None,
-) -> None:
- """Convert HuggingFace model checkpoint to Friendli format.
-
- Args:
- model_name_or_path (str): Hugging Face model name or local path to the checkpoint.
- output_model_file_name (str): File name of converted checkpoint to save.
- output_attr_file_name (str): File name of the attribute YAML file for
- the converted checkpoint.
- output_dir (str) : Directory path to save the converted checkpoint and the attribute YAML,
- and tokenizer configuration file.
- output_ckpt_file_type (CheckpointFileType): The file type of converted checkpoint.
- data_type (Optional[ModelDataType]): Converted checkpoint data type.
- Defaults to torch_dtype in 'config.json'
- attr_output_path (Optional[str], optional): Path to create the attribute YAML file for
- the converted checkpoint. Defaults to None.
- cache_dir (Optional[str], optional): Path for downloading checkpoint. Defaults to None.
- dry_run (bool, optional): Check only if checkpoint is convertable. Defaults to False.
- quantize (bool, optional): Enable quantization. Defaults to False.
- quant_config (Optional[OneOfQuantConfig], optional): Quantization configuration.
- Defaults to None.
-
- Raises:
- InValidconfigError: Raised when data_type is not supported.
- NotFoundError: Raised when `model_name_or_path` or `tokenizer_output_dir` is not found.
- NotSupportedCheckpointError: Raised when model architecture is not supported to convert.
-
- """
- # pylint: disable=too-many-locals
- model_output_path = os.path.join(output_dir, output_model_file_name)
- model_config = get_model_pretrained_config(
- model_name_or_path, model_output_path, cache_dir
- )
- generation_config = get_model_generation_config(model_name_or_path, cache_dir)
-
- model_arch = get_model_arch(model_config)
- hf_factory, converter_factory = get_hf_converter_factory(model_arch)
- converter = converter_factory(
- config=model_config,
- generation_config=generation_config,
- data_type=data_type,
- )
-
- if quantize:
- assert quant_config is not None
- # common quantization only supports `.safetensors`` output format.
- if quant_config.mode == QuantMode.FP8:
- assert output_ckpt_file_type == CheckpointFileType.SAFETENSORS
- converter = get_quantized_converter( # type: ignore[assignment]
- quant_config, converter
- )
-
- converter.check_config()
-
- if not dry_run:
- logger.info(
- "Start loading Hugging Face checkpoint(%s) for conversion...",
- model_name_or_path,
- )
- model = hf_factory.from_pretrained(
- model_name_or_path,
- torch_dtype=model_config.torch_dtype,
- cache_dir=cache_dir,
- trust_remote_code=True,
- low_cpu_mem_usage=True,
- # `low_cpu_mem_usage` is for model loading faster and using ~1x model size CPU memory.
- # https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel.from_pretrained.example
- )
-
- logger.info(
- "Hugging Face checkpoint(%s) is successfully loaded!",
- model_name_or_path,
- )
-
- convert_info_list = converter.get_convert_info_list()
- with get_saver(
- output_ckpt_file_type, output_dir, output_model_file_name
- ) as saver:
- for name, w in converter.convert(
- model,
- convert_info_list,
- output_ckpt_file_type == CheckpointFileType.HDF5,
- ):
- saver.save_tensor(name, w)
-
- logger.info(
- "Hugging Face checkpoint(%s) is successfully converted to Friendli format!",
- model_name_or_path,
- )
-
- # Save attr.yaml
- attr_output_path = os.path.join(output_dir, output_attr_file_name)
- if quant_config and quant_config.mode == QuantMode.FP8 and ModelDataType.FP8_E4M3:
- model_config.torch_dtype = (
- get_torch_data_type(data_type) if data_type else model_config.torch_dtype
- )
- setattr(model_config, "use_fp8_e4m3", True)
- model_config.to_json_file(os.path.join(output_dir, "config.json"))
- else:
- attr = converter.get_attributes()
- with open(attr_output_path, "w", encoding="utf-8") as file:
- yaml.dump(attr, file, sort_keys=False)
-
- # Save tokenizer files.
- tokenizer_output_dir = output_dir
- try:
- saved_tokenizer_file_paths = save_tokenizer(
- model_name_or_path=model_name_or_path,
- cache_dir=cache_dir,
- save_dir=tokenizer_output_dir,
- )
- except TokenizerNotFoundError as exc:
- logger.warn(str(exc))
-
- if not (
- quant_config and quant_config.mode == QuantMode.FP8 and ModelDataType.FP8_E4M3
- ):
- for path in saved_tokenizer_file_paths:
- if "tokenizer.json" not in path:
- try:
- os.remove(path)
- except FileNotFoundError:
- logger.warn(
- "Tried to delete unnecessary tokenizer file %s but the file "
- "is not found.",
- path,
- )
-
-
-def convert_adapter_checkpoint( # pylint: disable=too-many-locals, too-many-arguments
- adapter_name_or_path: str,
- output_attr_filename: str,
- output_dir: str,
- output_adapter_filename: str,
- base_model_name_or_path: Optional[str],
- data_type: Optional[ModelDataType],
- output_adapter_file_type: CheckpointFileType,
- cache_dir: Optional[str],
- dry_run: bool = False,
-) -> None:
- """Convert HuggingFace model checkpoint to Friendli format."""
- adapter_attr_output_path = os.path.join(output_dir, output_attr_filename)
- adapter_config = get_adapter_config(adapter_name_or_path, cache_dir)
- base_model_name_or_path = (
- base_model_name_or_path or adapter_config.base_model_name_or_path
- )
- model_config = get_model_pretrained_config(
- base_model_name_or_path,
- adapter_attr_output_path,
- cache_dir,
- )
- model_arch = get_model_arch(model_config)
- hf_factory, converter_factory = get_hf_converter_factory(model_arch)
- converter = converter_factory(
- config=model_config,
- generation_config=None,
- data_type=data_type,
- )
- adapter_converter = get_adapter_converter_factory(model_arch)(
- converter, adapter_config
- )
- adapter_converter.check_config()
-
- if not dry_run:
- logger.info(
- "Start loading Hugging Face adapter checkpoint(%s's %s) for conversion...",
- base_model_name_or_path,
- adapter_name_or_path,
- )
- with init_empty_weights():
- model = hf_factory.from_pretrained(
- base_model_name_or_path,
- torch_dtype=torch.float32,
- cache_dir=cache_dir,
- trust_remote_code=True,
- low_cpu_mem_usage=True,
- )
- # inplace model update
- PeftModel.from_pretrained(
- model, adapter_name_or_path, cache_dir=cache_dir, torch_dtype=torch.float32
- )
- logger.info(
- "Hugging Face adapter checkpoint (%s) is successfully loaded!",
- adapter_name_or_path,
- )
- convert_dict = adapter_converter.get_convert_info_list()
- with get_saver(
- output_adapter_file_type, output_dir, output_adapter_filename
- ) as saver:
- for name, w in adapter_converter.convert(
- model, convert_dict, output_adapter_file_type == CheckpointFileType.HDF5
- ):
- saver.save_tensor(name, w)
-
- logger.info(
- "Hugging Face checkpoint (%s) is successfully converted to Friendli format!",
- adapter_name_or_path,
- )
-
- attr = adapter_converter.get_attributes()
- with open(adapter_attr_output_path, "w", encoding="utf-8") as file:
- yaml.dump([attr], file, sort_keys=False)
diff --git a/friendli/modules/converter/interface.py b/friendli/modules/converter/interface.py
deleted file mode 100644
index 6e7db352..00000000
--- a/friendli/modules/converter/interface.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Converter Interface."""
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from collections.abc import Generator
-from typing import Any, Dict, List, Tuple, Union
-
-import numpy as np
-import torch
-from tqdm import tqdm
-
-from friendli.enums import ModelDataType
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import (
- convert_tensor_dtype,
- get_tensor_from_state_dict,
-)
-
-
-class ModelConversionInterface(ABC):
- """Interface get information for converting models."""
-
- @abstractmethod
- def get_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Get list of conversion informations for the model."""
-
- @abstractmethod
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
-
- @abstractmethod
- def check_config(self) -> None:
- """Check if the model is convertable."""
-
- def convert(
- self,
- model: torch.nn.Module,
- convert_info_list: List[ConvertInfo],
- save_numpy_format: bool = True,
- ) -> Generator[Tuple[str, Union[np.ndarray, torch.Tensor]], None, None]:
- """Convert Huggingface Model to Friendli format(.h5).
-
- Args:
- model (torch.nn.Module): Huggingface model.
- output_path (str): Path to save the converted checkpoint.
- convert_info_list (List[ConvertInfo]):
- List of convert information of the parameter in huggingface checkpoint.
- save_numpy_format (bool, optional): Save the converted tensor in numpy format.
- Defaults to True.
- """
- state_dict = model.state_dict()
- total_layers = len(convert_info_list)
- with tqdm(total=total_layers, desc="Converting", unit="tensor") as pbar:
- for convert_info in convert_info_list:
- converted_name, reshape_fn, param_names, data_type = (
- convert_info.converted_name,
- convert_info.reshape_fn,
- convert_info.param_names,
- convert_info.data_type,
- )
- params = [
- get_tensor_from_state_dict(state_dict, param_name)
- for param_name in param_names
- ]
- reshaped_tensor = convert_tensor_dtype(reshape_fn(params), data_type)
- if save_numpy_format:
- yield (
- converted_name,
- reshaped_tensor.view(torch.float16).numpy().view(np.uint16)
- if data_type == ModelDataType.BF16
- else reshaped_tensor.numpy(),
- )
- else:
- yield (
- converted_name,
- reshaped_tensor.contiguous(),
- )
-
- pbar.update()
-
-
-class NonTFBlockConversionInterface(ABC):
- """Interface get information for converting common layers."""
-
- @property
- @abstractmethod
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for the non-transformer blocks."""
-
-
-class DecoderTFBlockConversionInterface(ABC):
- """Interface get information for converting decoder layers."""
-
- @property
- @abstractmethod
- def decoder_layer_prefix(self) -> str:
- """Return the layer name prefix used before the decoder's transformer block number."""
-
- @property
- @abstractmethod
- def decoder_layer_num(self) -> int:
- """Return the number of transformer blocks in the decoder."""
-
- @property
- @abstractmethod
- def decoder_hidden_size(self) -> int:
- """Return the hidden size of the decoder."""
-
- @property
- @abstractmethod
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads."""
-
- @property
- @abstractmethod
- def decoder_num_attention_heads(self) -> int:
- """Return the number of attention heads in the decoder."""
-
- @property
- @abstractmethod
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for transformer blocks in the decoder."""
-
- @property
- @abstractmethod
- def decoder_head_size(self) -> int:
- """Return the head size of the decoder."""
-
- @property
- @abstractmethod
- def decoder_ff_intermediate_size(self) -> int:
- """Return the intermediate size of the linear layer in decoder's MLP."""
-
-
-class EncoderTFBlockConversionInterface(ABC):
- """Interface get information for converting encoder layers."""
-
- @property
- @abstractmethod
- def encoder_layer_prefix(self) -> str:
- """Return the layer name prefix used before the encoder's transformer block number."""
-
- @property
- @abstractmethod
- def encoder_layer_num(self) -> int:
- """Return the number of transformer blocks in the encoder."""
-
- @property
- @abstractmethod
- def encoder_hidden_size(self) -> int:
- """Return the hidden size of the encoder."""
-
- @property
- @abstractmethod
- def encoder_num_attention_heads(self) -> int:
- """Return the number of attention heads in the encoder."""
-
- @property
- @abstractmethod
- def encoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for transformer blocks in the encoder."""
-
- @property
- @abstractmethod
- def encoder_head_size(self) -> int:
- """Return the head size of the encoder."""
-
- @property
- @abstractmethod
- def encoder_ff_intermediate_size(self) -> int:
- """Return the intermediate size of the linear layer in encoder's MLP."""
-
-
-class RotaryEmbeddingConversionInterface(ABC):
- """Interface get information for converting rotary embeddings."""
-
- @property
- @abstractmethod
- def rotary_dim(self) -> int:
- """Return the dimension of rotary embeddings."""
-
- @property
- @abstractmethod
- def rotary_emb_base(self) -> float:
- """Return the base of rotary embeddings."""
diff --git a/friendli/modules/converter/maps.py b/friendli/modules/converter/maps.py
deleted file mode 100644
index 7a8bcd37..00000000
--- a/friendli/modules/converter/maps.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Defining Friendli Model Converter maps."""
-
-from __future__ import annotations
-
-from typing import Dict, Tuple, Type, Union
-
-from transformers import ( # type: ignore[import]
- AutoModelForCausalLM,
- BlenderbotForConditionalGeneration,
- BloomForCausalLM,
- CodeGenForCausalLM,
- CohereForCausalLM,
- DbrxForCausalLM,
- FalconForCausalLM,
- GPT2LMHeadModel,
- GPTJForCausalLM,
- GPTNeoXForCausalLM,
- LlamaForCausalLM,
- MistralForCausalLM,
- MixtralForCausalLM,
- MptForCausalLM,
- OPTForCausalLM,
- Phi3ForCausalLM,
- PreTrainedModel,
- T5ForConditionalGeneration,
-)
-
-from friendli.errors import NotSupportedCheckpointError
-from friendli.modules.converter.base import OneOfAdapterConverter, OneOfConverter
-from friendli.modules.converter.models.arctic import ArcticForCausalLMConverter
-from friendli.modules.converter.models.blenderbot import BlenderbotConverter
-from friendli.modules.converter.models.bloom import BloomForCausalLMConverter
-from friendli.modules.converter.models.codegen import CodegenForCausalLMConverter
-from friendli.modules.converter.models.cohere import CohereForCausalLMConverter
-from friendli.modules.converter.models.dbrx import DbrxForCausalLMConverter
-from friendli.modules.converter.models.falcon import FalconForCausalLMConverter
-from friendli.modules.converter.models.gpt2 import GPT2LMHeadModelConverter
-from friendli.modules.converter.models.gpt_neox import GPTNeoXForCausalLMConverter
-from friendli.modules.converter.models.gptj import (
- GPTJForCausalLMConverter,
- GPTJForCausalLMLoraConverter,
-)
-from friendli.modules.converter.models.llama import (
- LlamaForCausalLMConverter,
- LlamaForCausalLMLoraConverter,
-)
-from friendli.modules.converter.models.mistral import (
- MistralForCausalLMConverter,
- MistralForCausalLMLoraConverter,
-)
-from friendli.modules.converter.models.mixtral import MixtralForCausalLMConverter
-from friendli.modules.converter.models.mpt import (
- MPTForCausalLMConverter,
- MptForCausalLMLoraConverter,
-)
-from friendli.modules.converter.models.opt import OPTForCausalLMConverter
-from friendli.modules.converter.models.phi3 import Phi3ForCausalLMConverter
-from friendli.modules.converter.models.phi_msft import PhiForCausalLMConverter
-from friendli.modules.converter.models.t5 import T5Converter
-
-MODEL_ARCH_CONVERTER_MAP: Dict[
- str, Tuple[Union[PreTrainedModel, PreTrainedModel], Type[OneOfConverter]]
-] = {
- "BlenderbotForConditionalGeneration": (
- BlenderbotForConditionalGeneration,
- BlenderbotConverter,
- ),
- "BloomForCausalLM": (BloomForCausalLM, BloomForCausalLMConverter),
- "CodeGenForCausalLM": (CodeGenForCausalLM, CodegenForCausalLMConverter),
- "FalconForCausalLM": (FalconForCausalLM, FalconForCausalLMConverter),
- "GPTNeoXForCausalLM": (GPTNeoXForCausalLM, GPTNeoXForCausalLMConverter),
- "GPT2LMHeadModel": (GPT2LMHeadModel, GPT2LMHeadModelConverter),
- "GPTJForCausalLM": (GPTJForCausalLM, GPTJForCausalLMConverter),
- "LlamaForCausalLM": (LlamaForCausalLM, LlamaForCausalLMConverter),
- "LLaMAForCausalLM": (LlamaForCausalLM, LlamaForCausalLMConverter),
- "MistralForCausalLM": (MistralForCausalLM, MistralForCausalLMConverter),
- "MixtralForCausalLM": (MixtralForCausalLM, MixtralForCausalLMConverter),
- "MPTForCausalLM": (MptForCausalLM, MPTForCausalLMConverter),
- "OPTForCausalLM": (OPTForCausalLM, OPTForCausalLMConverter),
- "T5ForConditionalGeneration": (T5ForConditionalGeneration, T5Converter),
- "PhiForCausalLM": (AutoModelForCausalLM, PhiForCausalLMConverter),
- "CohereForCausalLM": (CohereForCausalLM, CohereForCausalLMConverter),
- "DbrxForCausalLM": (DbrxForCausalLM, DbrxForCausalLMConverter),
- "Phi3ForCausalLM": (Phi3ForCausalLM, Phi3ForCausalLMConverter),
- "ArcticForCausalLM": (AutoModelForCausalLM, ArcticForCausalLMConverter),
-}
-
-MODEL_ARCH_ADAPTER_CONVERTER_MAP: Dict[
- str,
- Type[OneOfAdapterConverter],
-] = {
- "GPTJForCausalLM": GPTJForCausalLMLoraConverter,
- "LlamaForCausalLM": LlamaForCausalLMLoraConverter,
- "LLaMAForCausalLM": LlamaForCausalLMLoraConverter,
- "MPTForCausalLM": MptForCausalLMLoraConverter,
- "MistralForCausalLM": MistralForCausalLMLoraConverter,
-}
-
-
-def get_hf_converter_factory(
- model_arch: str,
-) -> Tuple[PreTrainedModel, Type[OneOfConverter]]:
- """Return the converter factory for the given model architecture.
-
- Args:
- model_arch (str): Model architecture name.
-
- Returns:
- Tuple[PretrainedModel, Type[OneOfConverter]]: Tuple of
- model class and converter class.
-
- Raises:
- NotSupportedCheckpointError: Raised when the given model architecture is not supported.
-
- """
- if model_arch not in MODEL_ARCH_CONVERTER_MAP:
- raise NotSupportedCheckpointError(
- invalid_option=f"Model architecture='{model_arch}'",
- valid_options=list(MODEL_ARCH_CONVERTER_MAP.keys()),
- )
-
- return MODEL_ARCH_CONVERTER_MAP[model_arch]
-
-
-def get_adapter_converter_factory(
- model_arch: str,
-) -> Type[OneOfAdapterConverter]:
- """Return the converter factory for the given model architecture.
-
- Args:
- model_arch (str): Model architecture name.
-
- Returns:
- Type[LoraConverter]: Adapter Converter class.
-
- Raises:
- NotSupportedCheckpointError: Raised when the given model architecture is not supported.
- """
- try:
- adapter_converter_type = MODEL_ARCH_ADAPTER_CONVERTER_MAP[model_arch]
- except KeyError as exc:
- raise NotSupportedCheckpointError(
- invalid_option=f"adapter for model architecture='{model_arch}'",
- valid_options=list(MODEL_ARCH_ADAPTER_CONVERTER_MAP.keys()),
- ) from exc
- return adapter_converter_type
diff --git a/friendli/modules/converter/models/arctic.py b/friendli/modules/converter/models/arctic.py
deleted file mode 100644
index 293d21d9..00000000
--- a/friendli/modules/converter/models/arctic.py
+++ /dev/null
@@ -1,254 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Arctic Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-from typing import cast
-
-from transformers import PretrainedConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import FP8OnlyConverter
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-
-
-class ArcticConfig(PretrainedConfig):
- r"""
- This is the configuration class to store the configuration of a [`ArcticModel`]. It is used to instantiate an
- Arctic model according to the specified arguments, defining the model architecture. Instantiating a configuration
- with the defaults will yield a similar configuration to that of the #TODO(rsamdani): add what model has the default config..
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
- documentation from [`PretrainedConfig`] for more information.
- Args:
- vocab_size (`int`, *optional*, defaults to 32000):
- Vocabulary size of the Arctic model. Defines the number of different tokens that can be represented by the
- `inputs_ids` passed when calling [`ArcticModel`]
- hidden_size (`int`, *optional*, defaults to 4096):
- Dimension of the hidden representations.
- intermediate_size (`int`, *optional*, defaults to 14336):
- Dimension of the MLP representations.
- num_hidden_layers (`int`, *optional*, defaults to 32):
- Number of hidden layers in the Transformer encoder.
- num_attention_heads (`int`, *optional*, defaults to 32):
- Number of attention heads for each attention layer in the Transformer encoder.
- num_key_value_heads (`int`, *optional*, defaults to 8):
- This is the number of key_value heads that should be used to implement Grouped Query Attention. If
- `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
- `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
- converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
- by meanpooling all the original heads within that group. For more details checkout [this
- paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
- hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
- The non-linear activation function (function or string) in the decoder.
- max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
- The maximum sequence length that this model might ever be used with. Arctic's sliding window attention
- allows sequence of up to 4096*32 tokens.
- initializer_range (`float`, *optional*, defaults to 0.02):
- The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
- rms_norm_eps (`float`, *optional*, defaults to 1e-05):
- The epsilon used by the rms normalization layers.
- use_cache (`bool`, *optional*, defaults to `True`):
- Whether or not the model should return the last key/values attentions (not used by all models). Only
- relevant if `config.is_decoder=True`.
- pad_token_id (`int`, *optional*):
- The id of the padding token.
- bos_token_id (`int`, *optional*, defaults to 1):
- The id of the "beginning-of-sequence" token.
- eos_token_id (`int`, *optional*, defaults to 2):
- The id of the "end-of-sequence" token.
- tie_word_embeddings (`bool`, *optional*, defaults to `False`):
- Whether the model's input and output word embeddings should be tied.
- rope_theta (`float`, *optional*, defaults to 1000000.0):
- The base period of the RoPE embeddings.
- sliding_window (`int`, *optional*):
- Sliding window attention window size. If not specified, will default to `4096`.
- attention_dropout (`float`, *optional*, defaults to 0.0):
- The dropout ratio for the attention probabilities.
- num_experts_per_tok (`int`, *optional*, defaults to 2):
- The number of experts to root per-token, can be also interpreted as the `top-p` routing
- parameter
- num_local_experts (`int`, *optional*, defaults to 8):
- Number of experts per Sparse MLP layer.
- router_aux_loss_coef (`float`, *optional*, defaults to 0.001):
- The aux loss factor for the total loss.
- ```python
- >>> from transformers import ArcticModel, ArcticConfig
- >>> # Initializing a Arctic 7B style configuration TODO(rsamdani): verify which model does the default configuration correspond to.
- >>> configuration = ArcticConfig()
- >>> # Initializing a model from the Arctic 7B style configuration
- >>> model = ArcticModel(configuration)
- >>> # Accessing the model configuration
- >>> configuration = model.config
- ```"""
-
- model_type = "arctic"
- keys_to_ignore_at_inference = ["past_key_values"]
-
- def __init__(
- self,
- vocab_size=32000,
- hidden_size=4096,
- intermediate_size=14336,
- num_hidden_layers=32,
- num_attention_heads=32,
- num_key_value_heads=None,
- hidden_act="silu",
- max_position_embeddings=4096,
- initializer_range=0.02,
- rms_norm_eps=1e-5,
- use_cache=True,
- pad_token_id=None,
- bos_token_id=1,
- eos_token_id=2,
- tie_word_embeddings=False,
- rope_theta=1e6,
- sliding_window=None,
- attention_dropout=0.0,
- num_experts_per_tok=1,
- num_local_experts=8,
- router_aux_loss_coef=0.001,
- moe_layer_frequency=2,
- parallel_attn_mlp_res=False,
- moe_train_capacity_factor=1,
- moe_eval_capacity_factor=1,
- enable_expert_tensor_parallelism=False,
- moe_min_capacity=0,
- moe_token_dropping=True,
- **kwargs,
- ):
- self.vocab_size = vocab_size
- self.max_position_embeddings = max_position_embeddings
- self.hidden_size = hidden_size
- self.intermediate_size = intermediate_size
- self.num_hidden_layers = num_hidden_layers
- self.num_attention_heads = num_attention_heads
- self.sliding_window = sliding_window
-
- # for backward compatibility
- if num_key_value_heads is None:
- num_key_value_heads = num_attention_heads
-
- self.num_key_value_heads = num_key_value_heads
- self.hidden_act = hidden_act
- self.initializer_range = initializer_range
- self.rms_norm_eps = rms_norm_eps
- self.use_cache = use_cache
- self.rope_theta = rope_theta
- self.attention_dropout = attention_dropout
-
- self.num_experts_per_tok = num_experts_per_tok
- self.num_local_experts = num_local_experts
- self.router_aux_loss_coef = router_aux_loss_coef
- self.moe_layer_frequency = moe_layer_frequency
- self.moe_train_capacity_factor = moe_train_capacity_factor
- self.moe_eval_capacity_factor = moe_eval_capacity_factor
- self.enable_expert_tensor_parallelism = enable_expert_tensor_parallelism
- self.moe_min_capacity = moe_min_capacity
- self.moe_token_dropping = moe_token_dropping
- self.parallel_attn_mlp_res = parallel_attn_mlp_res
- super().__init__(
- pad_token_id=pad_token_id,
- bos_token_id=bos_token_id,
- eos_token_id=eos_token_id,
- tie_word_embeddings=tie_word_embeddings,
- **kwargs,
- )
-
-
-class ArcticForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface):
- """ArcticForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Arctic architectures' config can be converted to Friendli format."""
- super().check_config()
- config = cast(ArcticConfig, self.config)
- try:
- if config.tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if config.hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={config.hidden_act}'",
- valid_options=["silu"],
- )
- if config.moe_layer_frequency != 1:
- raise NotSupportedCheckpointError(
- invalid_option=f"'moe_layer_frequency={config.moe_layer_frequency}'",
- valid_options=[1],
- )
- if not config.parallel_attn_mlp_res:
- raise NotSupportedCheckpointError(
- invalid_option=f"'parallel_attn_mlp_res={config.parallel_attn_mlp_res}'",
- valid_options=[True],
- )
-
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "arctic"
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before Arctic's transformer block number."""
- return "model.layers."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in Arctic."""
- return cast(ArcticConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in Arctic."""
- return cast(ArcticConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in Arctic."""
- return cast(ArcticConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in Arctic."""
- config = cast(ArcticConfig, self.config)
- if config.num_key_value_heads is None:
- return self.decoder_num_attention_heads
- return config.num_key_value_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of Arctic."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in Arctic MLP."""
- return cast(ArcticConfig, self.config).intermediate_size
-
- @property
- def rotary_dim(self) -> int:
- """The rotary embedding dimension of Arctic."""
- return self.decoder_head_size
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary embedding base of Arctic."""
- return cast(ArcticConfig, self.config).rope_theta
-
- @property
- def num_experts(self) -> int:
- """The number of moe experts per transformer block in Arctic."""
- return cast(ArcticConfig, self.config).num_local_experts
-
- @property
- def num_selected_moe_experts(self) -> int:
- """The number of selected moe experts per transformer block in Arctic."""
- return cast(ArcticConfig, self.config).num_experts_per_tok
diff --git a/friendli/modules/converter/models/blenderbot.py b/friendli/modules/converter/models/blenderbot.py
deleted file mode 100644
index 224ded48..00000000
--- a/friendli/modules/converter/models/blenderbot.py
+++ /dev/null
@@ -1,472 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Blenderbot Checkpoint Converter."""
-
-from __future__ import annotations
-
-import math
-from typing import Any, Dict, List, cast
-
-import numpy as np
-import torch
-from transformers import BlenderbotConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- ENCODER_PREFIX,
- SUPPORTED_GELU_FAMILY,
- EncoderDecoderConverter,
-)
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class BlenderbotConverter(EncoderDecoderConverter):
- """BlenderbotForConditionalGeneration Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Blenderbot architectures's config can be converted to Friendli format."""
- super().check_config()
- config = cast(BlenderbotConfig, self.config)
- try:
- if config.activation_function not in SUPPORTED_GELU_FAMILY:
- raise NotSupportedCheckpointError(
- invalid_option="'activation_function="
- f"{cast(BlenderbotConfig, self.config).activation_function}'",
- valid_options=SUPPORTED_GELU_FAMILY,
- )
- if not config.tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=False'",
- valid_options=[True],
- )
- if self.encoder_num_attention_heads != self.decoder_num_attention_heads:
- raise NotSupportedCheckpointError(
- invalid_option=(
- f"encoder_num_attention_heads={self.encoder_num_attention_heads} "
- f"decoder_num_attention_heads={self.decoder_num_attention_heads}"
- ),
- valid_options=[
- "encoder_num_attention_heads == decoder_num_attention_heads"
- ],
- )
- if config.decoder_ffn_dim != config.encoder_ffn_dim:
- raise NotSupportedCheckpointError(
- invalid_option=(
- f"encoder_ffn_dim={config.encoder_ffn_dim} "
- f"decoder_ffn_dim={config.decoder_ffn_dim}"
- ),
- valid_options=["encoder_ffn_dim == decoder_ffn_dim"],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def token_embed_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """Reshape token embedding weight for Blenderbot's embedding layer."""
- assert len(params) == 1
- embed_dim = cast(BlenderbotConfig, self.config).d_model
- embed_scale = (
- math.sqrt(embed_dim)
- if cast(BlenderbotConfig, self.config).scale_embedding
- else 1.0
- )
- embed_weight = params[0]
- embed_weight = embed_weight * embed_scale
- return embed_weight
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(BlenderbotConfig, self.config)
-
- logger.warn(
- "Since Blenderbot uses absolute position embedding, 'max_input_length' and "
- "'max_output_length' cannot be larger than %d.",
- config.max_position_embeddings,
- )
-
- eos_token_id = self.get_eos_token_id()
- decoder_start_token_id = self.get_decoder_start_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.encoder_head_size,
- "num_heads": self.encoder_num_attention_heads,
- "hidden_size": self.encoder_hidden_size,
- "ff_intermediate_size": self.decoder_ff_intermediate_size,
- "num_encoder_layers": self.encoder_layer_num,
- "num_decoder_layers": self.decoder_layer_num,
- "max_input_length": config.max_position_embeddings,
- "max_output_length": config.max_position_embeddings,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "decoder_start_token": (
- decoder_start_token_id
- if decoder_start_token_id is not None
- else "FILL ME"
- ),
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "blenderbot"
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in Blenderbot."""
- return [
- ConvertInfo(
- param_names=["model.shared.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.shared.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.encoder.embed_positions.weight"],
- data_type=self.data_type,
- converted_name=f"{ENCODER_PREFIX}/wpe/weight:0",
- reshape_fn=self.pos_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.decoder.embed_positions.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/wpe/weight:0",
- reshape_fn=self.pos_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.encoder.layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{ENCODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.encoder.layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{ENCODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["model.decoder.layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.decoder.layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ]
-
- @property
- def encoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in Blenderbot's encoder."""
- convert_info_list = []
- for i in range(self.encoder_layer_num):
- layer_prefix = f"{self.encoder_layer_prefix}{i}."
- converted_prefix = f"{ENCODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.weight",
- f"{layer_prefix}self_attn.k_proj.weight",
- f"{layer_prefix}self_attn.v_proj.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.bias",
- f"{layer_prefix}self_attn.k_proj.bias",
- f"{layer_prefix}self_attn.v_proj.bias",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/bias:0",
- reshape_fn=self.qkv_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.out_proj.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}final_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}final_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc1.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc2.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc2.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in Blenderbot's decoder."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.weight",
- f"{layer_prefix}self_attn.k_proj.weight",
- f"{layer_prefix}self_attn.v_proj.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.bias",
- f"{layer_prefix}self_attn.k_proj.bias",
- f"{layer_prefix}self_attn.v_proj.bias",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/bias:0",
- reshape_fn=self.qkv_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.out_proj.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}encoder_attn_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}encoder_attn_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}final_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_3/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}final_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_3/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}encoder_attn.q_proj.weight",
- f"{layer_prefix}encoder_attn.k_proj.weight",
- f"{layer_prefix}encoder_attn.v_proj.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}cross_attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}encoder_attn.q_proj.bias",
- f"{layer_prefix}encoder_attn.k_proj.bias",
- f"{layer_prefix}encoder_attn.v_proj.bias",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}cross_attn/c_attn/bias:0",
- reshape_fn=self.qkv_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}encoder_attn.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}cross_attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}encoder_attn.out_proj.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}cross_attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc1.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc2.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc2.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def encoder_layer_prefix(self) -> str:
- """The layer name prefix used before Blenderbot encoder's transformer block number."""
- return "model.encoder.layers."
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before Blenderbot decoder's transformer block number."""
- return "model.decoder.layers."
-
- @property
- def encoder_layer_num(self) -> int:
- """The number of transformer blocks in Blenderbot encoder."""
- return cast(BlenderbotConfig, self.config).encoder_layers
-
- @property
- def encoder_hidden_size(self) -> int:
- """The hidden size of Blenderbot encoder."""
- return cast(BlenderbotConfig, self.config).d_model
-
- @property
- def encoder_num_attention_heads(self) -> int:
- """The number of attention heads of Blenderbot encoder."""
- return cast(BlenderbotConfig, self.config).encoder_attention_heads
-
- @property
- def encoder_head_size(self) -> int:
- """The size of each attention head of Blenderbot encoder."""
- return self.encoder_hidden_size // self.encoder_num_attention_heads
-
- @property
- def encoder_ff_intermediate_size(self) -> int:
- """The intermediate of the linear layer in Blenderbot encoder's MLP."""
- return cast(BlenderbotConfig, self.config).encoder_ffn_dim
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of transformer blocks in Blenderbot decoder."""
- return cast(BlenderbotConfig, self.config).decoder_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size of Blenderbot decoder."""
- return cast(BlenderbotConfig, self.config).d_model
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads of Blenderbot decoder."""
- return cast(BlenderbotConfig, self.config).decoder_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads of blenderbot decoder."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The size of each attention head of Blenderbot decoder."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate of the linear layer in Blenderbot decoder's MLP."""
- return cast(BlenderbotConfig, self.config).decoder_ffn_dim
diff --git a/friendli/modules/converter/models/bloom.py b/friendli/modules/converter/models/bloom.py
deleted file mode 100644
index 7ce615ad..00000000
--- a/friendli/modules/converter/models/bloom.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Bloom Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import numpy as np
-import torch
-from transformers import BloomConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import DECODER_PREFIX, DecoderOnlyConverter
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class BloomForCausalLMConverter(DecoderOnlyConverter):
- """BloomForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Bloom architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(BloomConfig, self.config).apply_residual_connection_post_layernorm:
- raise NotSupportedCheckpointError(
- invalid_option="apply_residual_connection_post_layernorm=True",
- valid_options=[False],
- )
- if cast(BloomConfig, self.config).slow_but_exact:
- raise NotSupportedCheckpointError(
- invalid_option="slow_but_exact=True", valid_options=[False]
- )
- if not cast(BloomConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="tie_word_embeddings=False", valid_options=[True]
- )
- if cast(BloomConfig, self.config).layer_norm_epsilon != 1e-5:
- raise NotSupportedCheckpointError(
- invalid_option="layer_norm_epsilon="
- f"{cast(BloomConfig, self.config).layer_norm_epsilon}",
- valid_options=[1e-5],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def qkv_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """qkv_weight_reshape for Bloom's attention layer."""
- assert len(params) == 1
- qkv_weight = params[0]
- split_qkv_weight_list = torch.split(qkv_weight, self.decoder_head_size, dim=0)
- qkv_weight_list = [
- torch.cat(
- [
- split_qkv_weight_list[j * 3 + i]
- for j in range(self.decoder_num_attention_heads)
- ],
- dim=0,
- ).reshape(-1, self.decoder_hidden_size)
- for i in range(3)
- ]
-
- qkv_weight = torch.cat(qkv_weight_list, dim=0).transpose(0, 1)
- return qkv_weight
-
- def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """qkv_bias_reshape for Bloom's attention layer."""
- assert len(params) == 1
- qkv_bias = params[0]
- split_qkv_bias_list = torch.split(qkv_bias, self.decoder_head_size, dim=0)
- qkv_bias_list = [
- torch.cat(
- [
- split_qkv_bias_list[j * 3 + i]
- for j in range(self.decoder_num_attention_heads)
- ],
- dim=0,
- )
- for i in range(3)
- ]
-
- qkv_bias = torch.cat(qkv_bias_list, dim=0)
- return qkv_bias
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(BloomConfig, self.config)
-
- logger.warn(
- "The 'max_length' field is left blank as it cannot be automatically configured. "
- "You must determine the 'max_length' according to your needs. The Bloom model does "
- "not rely on absolute position embeddings, allowing you to choose any "
- "suitable value."
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "num_heads": self.decoder_num_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": "FILL ME",
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "bloom"
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in Bloom."""
- return [
- ConvertInfo(
- param_names=["transformer.word_embeddings.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.word_embeddings_layernorm.weight"],
- data_type=self.data_type,
- converted_name="wte/ln/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.word_embeddings_layernorm.bias"],
- data_type=self.data_type,
- converted_name="wte/ln/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ]
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in Bloom."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attention.query_key_value.bias"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/bias:0",
- reshape_fn=self.qkv_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attention.dense.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}post_attention_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}post_attention_layernorm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_h_to_4h.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_4h_to_h.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attention.query_key_value.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attention.dense.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_h_to_4h.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_4h_to_h.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before Bloom's transformer block number."""
- return "transformer.h."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in Bloom."""
- return cast(BloomConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """Return the hidden size in Bloom."""
- return cast(BloomConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in Bloom."""
- return cast(BloomConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in bloom."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The size of each attention head in Bloom."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in Bloom MLP."""
- return self.decoder_hidden_size * 4
diff --git a/friendli/modules/converter/models/codegen.py b/friendli/modules/converter/models/codegen.py
deleted file mode 100644
index a6f1ef03..00000000
--- a/friendli/modules/converter/models/codegen.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli CodeGen Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import torch
-from transformers import CodeGenConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- SUPPORTED_GELU_FAMILY,
- DecoderOnlyConverter,
-)
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class CodegenForCausalLMConverter(
- DecoderOnlyConverter, RotaryEmbeddingConversionInterface
-):
- """CodegenForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if CodeGen architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if (
- cast(CodeGenConfig, self.config).activation_function
- not in SUPPORTED_GELU_FAMILY
- ):
- raise NotSupportedCheckpointError(
- invalid_option="'activation_function="
- f"{cast(CodeGenConfig, self.config).activation_function}'",
- valid_options=SUPPORTED_GELU_FAMILY,
- )
- if cast(CodeGenConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if cast(CodeGenConfig, self.config).layer_norm_epsilon != 1e-5:
- raise NotSupportedCheckpointError(
- invalid_option="'layer_norm_epsilon="
- f"{cast(CodeGenConfig, self.config).layer_norm_epsilon}'",
- valid_options=[1e-5],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def qkv_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """qkv_weight_reshape for CodeGen's attention layer."""
- assert len(params) == 1
- original_qkv_weight = params[0]
- reshaped_qkv_weight = original_qkv_weight.reshape(
- (4, original_qkv_weight.size(0) // 4, original_qkv_weight.size(1))
- )
- q_weight, v_weight, k_weight = torch.split(
- reshaped_qkv_weight, reshaped_qkv_weight.size(1) // 3, dim=1
- )
- q_weight = q_weight.reshape((-1, q_weight.size(2)))
- k_weight = k_weight.reshape((-1, k_weight.size(2)))
- v_weight = v_weight.reshape((-1, v_weight.size(2)))
-
- qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0)
- qkv_weight = qkv_weight.transpose(0, 1)
-
- return qkv_weight
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(CodeGenConfig, self.config)
-
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The CodeGen model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- config.n_positions,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": config.n_positions,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "rope_theta": self.rotary_emb_base,
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "gpt-j"
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in CodeGen."""
- return [
- ConvertInfo(
- param_names=["transformer.wte.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.bias"],
- data_type=self.data_type,
- converted_name="head_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ]
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in CodeGen."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_1.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_in.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_out.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.qkv_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_in.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_out.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before CodeGen's transformer block number."""
- return "transformer.h."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in CodeGen."""
- return cast(CodeGenConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in CodeGen."""
- return cast(CodeGenConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in CodeGen."""
- return cast(CodeGenConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in the codegen."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head siez of CodeGen."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in codegen MLP."""
- return self.decoder_hidden_size * 4
-
- @property
- def rotary_dim(self) -> int:
- """The rotary dim in CodeGen."""
- return cast(CodeGenConfig, self.config).rotary_dim
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary emb base in CodeGen."""
- return 10000.0
diff --git a/friendli/modules/converter/models/cohere.py b/friendli/modules/converter/models/cohere.py
deleted file mode 100644
index 47217e23..00000000
--- a/friendli/modules/converter/models/cohere.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Cohere Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-from typing import cast
-
-from transformers import CohereConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.modules.converter.base import FP8OnlyConverter
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class CohereForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface):
- """CohereForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if LLaMA architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(CohereConfig, self.config).hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(CohereConfig, self.config).hidden_act}'",
- valid_options=["silu"],
- )
- if not cast(CohereConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=False'",
- valid_options=[True],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "cohere"
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before LLaMA's transformer block number."""
- return "model.layers."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in LLaMA."""
- return cast(CohereConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in LLaMA."""
- return cast(CohereConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in LLaMA."""
- return cast(CohereConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in LLaMA."""
- config = cast(CohereConfig, self.config)
- if config.num_key_value_heads is None:
- return self.decoder_num_attention_heads
- return config.num_key_value_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of LLaMA."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in LLaMA MLP."""
- return self.config.intermediate_size
-
- @property
- def rotary_dim(self) -> int:
- """The rotary embedding dimension of LLaMA."""
- return self.decoder_head_size
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary embedding base of LLaMA."""
- return cast(CohereConfig, self.config).rope_theta
diff --git a/friendli/modules/converter/models/dbrx.py b/friendli/modules/converter/models/dbrx.py
deleted file mode 100644
index 88c9094f..00000000
--- a/friendli/modules/converter/models/dbrx.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Dbrx Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-from typing import cast
-
-from transformers.models.dbrx.configuration_dbrx import ( # type: ignore[import]
- DbrxConfig,
- DbrxFFNConfig,
-)
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.modules.converter.base import FP8OnlyConverter
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-
-
-class DbrxForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface):
- """DbrxForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Dbrx architectures' config can be converted to Friendli format."""
- super().check_config()
- config = cast(DbrxConfig, self.config)
- try:
- if config.tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if config.ffn_config.moe_top_k not in [1, 2, 4]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'moe_top_k={config.ffn_config.moe_top_k}'",
- valid_options=[1, 2, 4],
- )
- if config.ffn_config.moe_num_experts not in [1, 2, 4, 8, 16]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'moe_num_experts={config.ffn_config.moe_num_experts}'",
- valid_options=[1, 2, 4, 8, 16],
- )
-
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "dbrx"
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before LLaMA's transformer block number."""
- return "transformer.blocks."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in LLaMA."""
- return cast(DbrxConfig, self.config).n_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in LLaMA."""
- return cast(DbrxConfig, self.config).d_model
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in LLaMA."""
- return cast(DbrxConfig, self.config).n_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in LLaMA."""
- config = cast(DbrxConfig, self.config)
- if config.attn_config.kv_n_heads is None:
- return self.decoder_num_attention_heads
- return config.attn_config.kv_n_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of LLaMA."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in LLaMA MLP."""
- return cast(DbrxConfig, self.config).ffn_config.ffn_hidden_size
-
- @property
- def rotary_dim(self) -> int:
- """The rotary embedding dimension of LLaMA."""
- return self.decoder_head_size
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary embedding base of LLaMA."""
- return cast(DbrxConfig, self.config).attn_config.rope_theta
diff --git a/friendli/modules/converter/models/falcon.py b/friendli/modules/converter/models/falcon.py
deleted file mode 100644
index 3dfdede2..00000000
--- a/friendli/modules/converter/models/falcon.py
+++ /dev/null
@@ -1,329 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Falcon Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import torch
-from transformers import FalconConfig # type: ignore[import]
-
-from friendli.errors import NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import DECODER_PREFIX, DecoderOnlyConverter
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import convert_to_gpt_j_params
-
-
-class FalconForCausalLMConverter(
- DecoderOnlyConverter, RotaryEmbeddingConversionInterface
-):
- """FalconForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Falcon architectures' config can be converted to Friendli format."""
- super().check_config()
- config = cast(FalconConfig, self.config)
-
- if config.layer_norm_epsilon != 1e-5:
- raise NotSupportedCheckpointError(
- invalid_option=f"'layer_norm_epsilon={config.layer_norm_epsilon}'",
- valid_options=[1e-5],
- )
-
- if config.alibi:
- raise NotSupportedCheckpointError(
- invalid_option=f"'alibi'={config.alibi}'",
- valid_options=[False],
- )
-
- if not config.rotary:
- raise NotSupportedCheckpointError(
- invalid_option=f"'rotary'={config.rotary}'",
- valid_options=[True],
- )
-
- if config.bias:
- raise NotSupportedCheckpointError(
- invalid_option=f"'bias'={config.bias}'",
- valid_options=[False],
- )
-
- if not config.new_decoder_architecture and not config.parallel_attn:
- raise NotSupportedCheckpointError(
- invalid_option=(
- f"'new_decoder_architecture'={config.new_decoder_architecture}"
- f"'parallel_attn'={config.parallel_attn}"
- ),
- valid_options=[
- "'new_decoder_architecture'=True",
- "'new_decoder_architecture'=False, 'parallel_attn'=True",
- ],
- )
-
- def qkv_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """qkv_weight_reshape for Falcon's attention layer."""
- assert len(params) == 1
- qkv_weight = params[0]
-
- num_queries_per_kv = (
- self.decoder_num_attention_heads // self.decoder_num_kv_attention_heads
- )
-
- qkv_weight = qkv_weight.reshape(
- self.decoder_num_kv_attention_heads,
- num_queries_per_kv + 2,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- q_weight = qkv_weight[:, :num_queries_per_kv].reshape(
- self.decoder_num_kv_attention_heads * num_queries_per_kv,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = qkv_weight[:, [-2]].reshape(
- self.decoder_num_kv_attention_heads,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
- v_weight = qkv_weight[:, [-1]].reshape(
- self.decoder_num_kv_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim)
- k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim)
-
- q_weight = q_weight.reshape(
- self.decoder_num_kv_attention_heads
- * num_queries_per_kv
- * self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = k_weight.reshape(
- self.decoder_num_kv_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0)
- qkv_weight = qkv_weight.transpose(0, 1)
-
- return qkv_weight
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(FalconConfig, self.config)
-
- logger.warn(
- "The 'max_length' field is left blank as it cannot be automatically configured. "
- "You must determine the 'max_length' according to your needs. The Falcon model does "
- "not rely on absolute position embeddings, allowing you to choose any "
- "suitable value."
- )
-
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_kv_heads": self.decoder_num_kv_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": "FILL ME",
- "vocab_size": config.vocab_size,
- "eos_token": self.get_eos_token_id() or "FILL ME",
- "rope_theta": self.rotary_emb_base,
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- if cast(FalconConfig, self.config).new_decoder_architecture:
- return "falcon"
- return "falcon-7b"
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in Falcon."""
- return [
- ConvertInfo(
- param_names=["transformer.word_embeddings.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ]
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in Falcon."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
-
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attention.query_key_value.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attention.dense.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_h_to_4h.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_4h_to_h.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
-
- if cast(FalconConfig, self.config).new_decoder_architecture:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_attn.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_attn.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_mlp.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_mlp.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ]
- )
- else:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before the Falcon's transformer block number."""
- return "transformer.h."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in Falcon."""
- return cast(FalconConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in Falcon."""
- return cast(FalconConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in Falcon."""
- return cast(FalconConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in Falcon."""
- config = cast(FalconConfig, self.config)
-
- if config.new_decoder_architecture:
- if config.num_kv_heads is not None:
- return config.num_kv_heads
- return config.num_attention_heads
-
- if config.multi_query:
- return 1
-
- if config.num_kv_heads is not None:
- return config.num_kv_heads
- return config.num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of Falcon."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in falcon MLP."""
- return self.decoder_hidden_size * 4
-
- @property
- def rotary_dim(self) -> int:
- """The rotary embedding dimesion of Falcon."""
- return self.decoder_head_size
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary embedding base of Falcon."""
- return cast(FalconConfig, self.config).rope_theta
diff --git a/friendli/modules/converter/models/gpt2.py b/friendli/modules/converter/models/gpt2.py
deleted file mode 100644
index d2e2de5d..00000000
--- a/friendli/modules/converter/models/gpt2.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPT2 Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import torch
-from transformers import GPT2Config # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- SUPPORTED_GELU_FAMILY,
- DecoderOnlyConverter,
-)
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class GPT2LMHeadModelConverter(DecoderOnlyConverter):
- """GPT2LMHeadModel Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if GPT2 architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if (
- cast(GPT2Config, self.config).activation_function
- not in SUPPORTED_GELU_FAMILY
- ):
- raise NotSupportedCheckpointError(
- invalid_option="'activation_function="
- f"{cast(GPT2Config, self.config).activation_function}'",
- valid_options=SUPPORTED_GELU_FAMILY,
- )
- if cast(GPT2Config, self.config).scale_attn_by_inverse_layer_idx:
- raise NotSupportedCheckpointError(
- invalid_option="'scale_attn_by_inverse_layer_idx=True'",
- valid_options=[False],
- )
- if not cast(GPT2Config, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=False'",
- valid_options=[True],
- )
- if cast(GPT2Config, self.config).layer_norm_epsilon != 1e-5:
- raise NotSupportedCheckpointError(
- invalid_option="'layer_norm_epsilon="
- f"{cast(GPT2Config, self.config).layer_norm_epsilon}'",
- valid_options=[1e-5],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(GPT2Config, self.config)
-
- logger.warn(
- "Since GPT2 uses absolute position embedding, 'max_length' cannot be "
- "larger than %d.",
- config.n_positions,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "num_heads": self.decoder_num_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": config.n_positions,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "gpt"
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in GPT2."""
- return [
- ConvertInfo(
- param_names=["transformer.wte.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.wpe.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/wpe/weight:0",
- reshape_fn=self.pos_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ]
-
- def linear_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """Reshape linear weight in GPT2, which does not need weight transpose."""
- assert len(params) == 1
- return params[0]
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in GPT2."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_1.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.c_attn.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.c_proj.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_2.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_2.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.c_fc.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.c_proj.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.c_attn.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.c_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.c_fc.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.c_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before GPT2's transformer block number."""
- return "transformer.h."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in GPT2."""
- return cast(GPT2Config, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in GPT2."""
- return cast(GPT2Config, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in GPT2."""
- return cast(GPT2Config, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in gpt2."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in codegen MLP."""
- return self.decoder_hidden_size * 4
-
- @property
- def decoder_head_size(self) -> int:
- """The head siez of GPT2."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
diff --git a/friendli/modules/converter/models/gpt_neox.py b/friendli/modules/converter/models/gpt_neox.py
deleted file mode 100644
index 47fe88a1..00000000
--- a/friendli/modules/converter/models/gpt_neox.py
+++ /dev/null
@@ -1,328 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPT NeoX Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import torch
-from transformers import GPTNeoXConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- SUPPORTED_GELU_FAMILY,
- DecoderOnlyConverter,
-)
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import convert_to_gpt_j_params
-
-
-class GPTNeoXForCausalLMConverter(
- DecoderOnlyConverter, RotaryEmbeddingConversionInterface
-):
- """GPTNeoXForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if GPTNeoX architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(GPTNeoXConfig, self.config).hidden_act not in SUPPORTED_GELU_FAMILY:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(GPTNeoXConfig, self.config).hidden_act}'",
- valid_options=SUPPORTED_GELU_FAMILY,
- )
- if not cast(GPTNeoXConfig, self.config).use_parallel_residual:
- raise NotSupportedCheckpointError(
- invalid_option="'use_parallel_residual=False'",
- valid_options=[True],
- )
- if cast(GPTNeoXConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if cast(GPTNeoXConfig, self.config).layer_norm_eps != 1e-5:
- raise NotSupportedCheckpointError(
- invalid_option="'layer_norm_eps="
- f"{cast(GPTNeoXConfig, self.config).layer_norm_eps}'",
- valid_options=[1e-5],
- )
- if cast(GPTNeoXConfig, self.config).rotary_emb_base != 10000:
- raise NotSupportedCheckpointError(
- invalid_option=(
- f"'rotary_emb_base={cast(GPTNeoXConfig, self.config).rotary_emb_base}'"
- ),
- valid_options=[10000],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def qkv_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """qkv_weight_reshape for GPTNeoX's attention layer."""
- assert len(params) == 1
- qkv_weight = params[0]
- qkv_weight = qkv_weight.reshape(
- self.decoder_num_attention_heads,
- 3,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- q_weight = qkv_weight[:, 0].reshape(
- self.decoder_num_attention_heads,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = qkv_weight[:, 1].reshape(
- self.decoder_num_attention_heads,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
- v_weight = qkv_weight[:, 2].reshape(
- self.decoder_num_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- q_weight = convert_to_gpt_j_params(param=q_weight, rotary_dim=self.rotary_dim)
- k_weight = convert_to_gpt_j_params(param=k_weight, rotary_dim=self.rotary_dim)
- q_weight = q_weight.reshape(
- self.decoder_num_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = k_weight.reshape(
- self.decoder_num_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0)
- qkv_weight = qkv_weight.transpose(0, 1)
-
- return qkv_weight
-
- def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """qkv_bias_reshape for GPTNeoX's attention layer."""
- assert len(params) == 1
- qkv_bias = params[0]
- qkv_bias = qkv_bias.reshape(
- self.decoder_num_attention_heads,
- 3,
- self.decoder_head_size,
- )
-
- q_bias = qkv_bias[:, 0].reshape(
- self.decoder_num_attention_heads, self.decoder_head_size
- )
- k_bias = qkv_bias[:, 1].reshape(
- self.decoder_num_attention_heads, self.decoder_head_size
- )
- v_bias = qkv_bias[:, 2].reshape(
- self.decoder_num_attention_heads * self.decoder_head_size
- )
-
- q_bias = convert_to_gpt_j_params(q_bias, self.rotary_dim).flatten()
- k_bias = convert_to_gpt_j_params(k_bias, self.rotary_dim).flatten()
-
- qkv_bias = torch.cat((q_bias, k_bias, v_bias), dim=0)
- return qkv_bias
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(GPTNeoXConfig, self.config)
-
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The GPTNeoX model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- config.max_position_embeddings,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": config.max_position_embeddings,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "rope_theta": self.rotary_emb_base,
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "gpt-neox"
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in GPTNeoX."""
- return [
- ConvertInfo(
- param_names=["gpt_neox.embed_in.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["gpt_neox.final_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["gpt_neox.final_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["embed_out.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ]
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in GPTNeoX."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attention.query_key_value.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/bias:0",
- reshape_fn=self.qkv_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attention.dense.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}post_attention_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}post_attention_layernorm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_h_to_4h.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_4h_to_h.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attention.query_key_value.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attention.dense.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_h_to_4h.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.dense_4h_to_h.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before GPTNeoX's transformer block number."""
- return "gpt_neox.layers."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in GPTNeoX."""
- return cast(GPTNeoXConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in GPTNeoX."""
- return cast(GPTNeoXConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in GPTNeoX."""
- return cast(GPTNeoXConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in gpt_neox."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head siez of GPTNeoX."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in codegen MLP."""
- return self.decoder_hidden_size * 4
-
- @property
- def rotary_dim(self) -> int:
- """The rotary embedding dimesion of GPTNeoX."""
- return int(self.decoder_head_size * cast(GPTNeoXConfig, self.config).rotary_pct)
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary embedding base of GPTNeoX."""
- return float(cast(GPTNeoXConfig, self.config).rotary_emb_base)
diff --git a/friendli/modules/converter/models/gptj.py b/friendli/modules/converter/models/gptj.py
deleted file mode 100644
index 7e0c464f..00000000
--- a/friendli/modules/converter/models/gptj.py
+++ /dev/null
@@ -1,430 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPTJ Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import torch
-from transformers import GPTJConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- SUPPORTED_GELU_FAMILY,
- DecoderOnlyConverter,
- DecoderOnlyLoraConverter,
-)
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class GPTJForCausalLMLoraConverter(DecoderOnlyLoraConverter):
- """GPTJForCausalLM LoRA Converter Class."""
-
- @property
- def adapter_target_module_map(self) -> Dict[str, str]:
- """Return the dictionary that maps Hugging Face's module name to Friendli's module name."""
- return {
- "q_proj": "query",
- "k_proj": "key",
- "v_proj": "value",
- "out_proj": "attn_fc",
- "fc_in": "ff1",
- "fc_out": "ff2",
- "wte": "wte",
- }
-
- @property
- def adapter_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for LoRA adapter modules in GPTJ."""
- convert_info_list = []
- target_modules = self.adapter_target_modules
-
- # Non-transformer modules
- if "wte" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=["transformer.wte.lora_embedding_A.default"],
- data_type=self.converter.data_type,
- converted_name="wte/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.wte.lora_embedding_B.default"],
- data_type=self.converter.data_type,
- converted_name="wte/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- # Transformer modules
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.converter.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
-
- if "query" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.q_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/query_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.q_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/query_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "key" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.k_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/key_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.k_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/key_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "value" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.v_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/value_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.v_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/value_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "attn_fc" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.out_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.out_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "ff1" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.fc_in.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.fc_in.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "ff2" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.fc_out.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.fc_out.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
-
-class GPTJForCausalLMConverter(
- DecoderOnlyConverter, RotaryEmbeddingConversionInterface
-):
- """GPTJForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if GPTJ architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if (
- cast(GPTJConfig, self.config).activation_function
- not in SUPPORTED_GELU_FAMILY
- ):
- raise NotSupportedCheckpointError(
- invalid_option="'activation_function="
- f"{cast(GPTJConfig, self.config).activation_function}'",
- valid_options=SUPPORTED_GELU_FAMILY,
- )
- if cast(GPTJConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if cast(GPTJConfig, self.config).layer_norm_epsilon != 1e-5:
- raise NotSupportedCheckpointError(
- invalid_option="'layer_norm_epsilon="
- f"{cast(GPTJConfig, self.config).layer_norm_epsilon}'",
- valid_options=[1e-5],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def qkv_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """qkv_weight_reshape for GPTJ's attention layer."""
- assert len(params) == 3
- qkv_weight = torch.cat(
- params,
- dim=0,
- )
- qkv_weight = qkv_weight.transpose(0, 1)
- return qkv_weight
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(GPTJConfig, self.config)
-
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The GPTJ model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- config.n_positions,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": config.n_positions,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "rope_theta": self.rotary_emb_base,
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "gpt-j"
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in GPTJ."""
- return [
- ConvertInfo(
- param_names=["transformer.wte.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.ln_f.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.bias"],
- data_type=self.data_type,
- converted_name="head_fc/bias:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ]
-
- @property
- def decoder_convert_info_list(self) -> List[ConvertInfo]:
- """The list of conversion informations for transformer modules in GPTJ."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_1.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_in.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_out.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.q_proj.weight",
- f"{layer_prefix}attn.k_proj.weight",
- f"{layer_prefix}attn.v_proj.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_in.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc_out.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before GPTJ's transformer module number."""
- return "transformer.h."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in GPTJ."""
- return cast(GPTJConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in GPTJ."""
- return cast(GPTJConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in GPTJ."""
- return cast(GPTJConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in gpt-j."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head siez of GPTJ."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in codegen MLP."""
- return self.decoder_hidden_size * 4
-
- @property
- def rotary_dim(self) -> int:
- """The rotary dim in GPTJ."""
- return cast(GPTJConfig, self.config).rotary_dim
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary emb base in GPTJ."""
- return 10000.0
diff --git a/friendli/modules/converter/models/llama.py b/friendli/modules/converter/models/llama.py
deleted file mode 100644
index 19381d5a..00000000
--- a/friendli/modules/converter/models/llama.py
+++ /dev/null
@@ -1,494 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli LLaMA Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-from typing import Any, Dict, Iterable, List, Set, cast
-
-import torch
-from transformers import LlamaConfig, LlamaForCausalLM # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- DecoderOnlyConverter,
- DecoderOnlyLoraConverter,
-)
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import convert_to_gpt_j_params
-
-
-class LlamaForCausalLMLoraConverter(DecoderOnlyLoraConverter):
- """LlamaForCausalLM LoRA Converter Class."""
-
- def pre_convert(
- self,
- model: torch.nn.Module,
- ) -> torch.nn.Module:
- """Adjust the LoRA Adapter module's params in Llama before converting."""
- converter = cast(LlamaForCausalLMConverter, self.converter)
- for layer in cast(LlamaForCausalLM, model).model.layers:
- if "query" in self.adapter_target_modules:
- query_b = layer.self_attn.q_proj.lora_B.default.weight
- query_b = query_b.reshape(
- converter.decoder_num_attention_heads,
- converter.decoder_head_size,
- -1,
- )
- query_b = convert_to_gpt_j_params(query_b, converter.decoder_head_size)
- query_b = query_b.reshape(
- converter.decoder_num_attention_heads * converter.decoder_head_size,
- -1,
- )
- layer.self_attn.q_proj.lora_B.default.weight.data = query_b
-
- if "key" in self.adapter_target_modules:
- key_b = layer.self_attn.k_proj.lora_B.default.weight
- key_b = key_b.reshape(
- converter.decoder_num_kv_attention_heads,
- converter.decoder_head_size,
- -1,
- )
- key_b = convert_to_gpt_j_params(key_b, converter.decoder_head_size)
- key_b = key_b.reshape(
- converter.decoder_num_attention_heads * converter.decoder_head_size,
- -1,
- )
- layer.self_attn.k_proj.lora_B.default.weight.data = key_b
-
- return model
-
- @property
- def adapter_target_module_map(self) -> Dict[str, str]:
- """Return the dictionary that maps Hugging Face's module name to Friendli's module name."""
- return {
- "q_proj": "query",
- "k_proj": "key",
- "v_proj": "value",
- "o_proj": "attn_fc",
- "up_proj": "ff1",
- "gate_proj": "ff_gate",
- "down_proj": "ff2",
- "embed_tokens": "wte",
- }
-
- @property
- def adapter_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for LoRA adapter modules in Llama."""
- convert_info_list = []
- target_modules = self.adapter_target_modules
-
- # Non-transformer modules
- if "wte" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=["model.embed_tokens.lora_embedding_A.default"],
- data_type=self.converter.data_type,
- converted_name="wte/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.embed_tokens.lora_embedding_B.default"],
- data_type=self.converter.data_type,
- converted_name="wte/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- # Transformer modules
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.converter.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- assert self.adapter_config.target_modules is not None
-
- if "query" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/query_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/query_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "key" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.k_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/key_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.k_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/key_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "value" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.v_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/value_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.v_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/value_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "attn_fc" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.o_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.o_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "ff1" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.up_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.up_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "ff_gate" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.gate_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_gate/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.gate_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_gate/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "ff2" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.down_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.down_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
-
-class LlamaForCausalLMConverter(
- DecoderOnlyConverter, RotaryEmbeddingConversionInterface
-):
- """LlamaForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if LLaMA architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(LlamaConfig, self.config).hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(LlamaConfig, self.config).hidden_act}'",
- valid_options=["silu"],
- )
- if cast(LlamaConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if cast(LlamaConfig, self.config).rms_norm_eps not in (1e-5, 1e-6):
- raise NotSupportedCheckpointError(
- invalid_option=f"'rms_norm_eps={cast(LlamaConfig, self.config).rms_norm_eps}'",
- valid_options=[1e-5, 1e-6],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def qkv_weight_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """qkv_weight_reshape for LLaMA's attention layer."""
- assert len(params) == 3
- q_weight = params[0]
- k_weight = params[1]
- v_weight = params[2]
-
- q_weight = q_weight.reshape(
- self.decoder_num_attention_heads,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = k_weight.reshape(
- self.decoder_num_kv_attention_heads,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
- q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim)
- k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim)
- q_weight = q_weight.reshape(
- self.decoder_num_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = k_weight.reshape(
- self.decoder_num_kv_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- qkv_weight = torch.cat([q_weight, k_weight, v_weight], dim=0)
- qkv_weight = qkv_weight.transpose(0, -1)
- return qkv_weight
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(LlamaConfig, self.config)
-
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The Llama model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- config.max_position_embeddings,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_kv_heads": self.decoder_num_kv_attention_heads,
- "num_layers": self.decoder_layer_num,
- "ff_intermediate_size": self.decoder_ff_intermediate_size,
- "max_length": config.max_position_embeddings,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "rope_theta": self.rotary_emb_base,
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "llama"
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in LLaMA."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.weight",
- f"{layer_prefix}self_attn.k_proj.weight",
- f"{layer_prefix}self_attn.v_proj.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.o_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}post_attention_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.gate_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_gate/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.up_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.down_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in LLaMA."""
- return [
- ConvertInfo(
- param_names=["model.embed_tokens.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.norm.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.weight"],
- data_type=self.data_type,
- converted_name=f"head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ]
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before LLaMA's transformer block number."""
- return "model.layers."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in LLaMA."""
- return cast(LlamaConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in LLaMA."""
- return cast(LlamaConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in LLaMA."""
- return cast(LlamaConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in LLaMA."""
- config = cast(LlamaConfig, self.config)
- if config.num_key_value_heads is None:
- return self.decoder_num_attention_heads
- return config.num_key_value_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of LLaMA."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in LLaMA MLP."""
- return self.config.intermediate_size
-
- @property
- def rotary_dim(self) -> int:
- """The rotary embedding dimension of LLaMA."""
- return self.decoder_head_size
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary embedding base of LLaMA."""
- return cast(LlamaConfig, self.config).rope_theta
diff --git a/friendli/modules/converter/models/mistral.py b/friendli/modules/converter/models/mistral.py
deleted file mode 100644
index bfc9e75b..00000000
--- a/friendli/modules/converter/models/mistral.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Mistral Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-from typing import Any, Dict, cast
-
-from transformers import MistralConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.models.llama import (
- LlamaForCausalLMConverter,
- LlamaForCausalLMLoraConverter,
-)
-
-
-class MistralForCausalLMLoraConverter(LlamaForCausalLMLoraConverter):
- """MistralForCausalLM LoRA Converter Class."""
-
-
-class MistralForCausalLMConverter(LlamaForCausalLMConverter):
- """MistralForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Mistral architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(MistralConfig, self.config).hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(MistralConfig, self.config).hidden_act}'",
- valid_options=["silu"],
- )
- if cast(MistralConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
-
- if cast(MistralConfig, self.config).rms_norm_eps not in (1e-5, 1e-6):
- raise NotSupportedCheckpointError(
- invalid_option=f"'rms_norm_eps={cast(MistralConfig, self.config).rms_norm_eps}'",
- valid_options=[1e-5, 1e-6],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(MistralConfig, self.config)
-
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The Mistral model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- config.max_position_embeddings,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_kv_heads": self.decoder_num_kv_attention_heads,
- "num_layers": self.decoder_layer_num,
- "ff_intermediate_size": self.decoder_ff_intermediate_size,
- "max_length": config.max_position_embeddings,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "attention_window_size": self.attention_window_size, # for sliding window,
- "rope_theta": self.rotary_emb_base,
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "mistral"
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in Mistral."""
- return cast(MistralConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in Mistral."""
- return cast(MistralConfig, self.config).hidden_size
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of linear layer in Mistral MLP."""
- return cast(MistralConfig, self.config).intermediate_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in Mistral."""
- return cast(MistralConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in Mistral."""
- config = cast(MistralConfig, self.config)
- if config.num_key_value_heads is None:
- return self.decoder_num_attention_heads
- return config.num_key_value_heads
-
- @property
- def attention_window_size(self) -> int:
- """The size of sliding window attention in Mistral."""
- return cast(MistralConfig, self.config).sliding_window
diff --git a/friendli/modules/converter/models/mixtral.py b/friendli/modules/converter/models/mixtral.py
deleted file mode 100644
index 5cf5a366..00000000
--- a/friendli/modules/converter/models/mixtral.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Mixtral Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, Optional, cast
-
-from transformers import MixtralConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.models.llama import LlamaForCausalLMConverter
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class MixtralForCausalLMConverter(LlamaForCausalLMConverter):
- """MixtralForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Mixtral architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(MixtralConfig, self.config).hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(MixtralConfig, self.config).hidden_act}'",
- valid_options=["silu"],
- )
- if cast(MixtralConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if cast(MixtralConfig, self.config).num_local_experts != 8:
- raise NotSupportedCheckpointError(
- invalid_option=f"'num_local_experts={cast(MixtralConfig, self.config).num_local_experts}",
- valid_options=[8],
- )
- if cast(MixtralConfig, self.config).num_experts_per_tok != 2:
- raise NotSupportedCheckpointError(
- invalid_option=f"'num_experts_per_tok={cast(MixtralConfig, self.config).num_experts_per_tok}",
- valid_options=[2],
- )
-
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(MixtralConfig, self.config)
-
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The Mixtral model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- config.max_position_embeddings,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_kv_heads": self.decoder_num_kv_attention_heads,
- "num_layers": self.decoder_layer_num,
- "ff_intermediate_size": self.decoder_ff_intermediate_size,
- "max_length": config.max_position_embeddings,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "rope_theta": self.rotary_emb_base,
- "num_experts": self.num_experts,
- }
- if isinstance(self.attention_window_size, int):
- # for sliding window
- attr["attention_window_size"] = self.attention_window_size
- return attr
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in LLaMA."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}input_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.weight",
- f"{layer_prefix}self_attn.k_proj.weight",
- f"{layer_prefix}self_attn.v_proj.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.o_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}post_attention_layernorm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}block_sparse_moe.gate.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}moe/router/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- for i in range(self.num_experts):
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}block_sparse_moe.experts.{i}.w1.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}moe/{i}/mlp/c_gate/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}block_sparse_moe.experts.{i}.w2.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}moe/{i}/mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}block_sparse_moe.experts.{i}.w3.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}moe/{i}/mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "mixtral"
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in Mixtral."""
- return cast(MixtralConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in Mixtral."""
- return cast(MixtralConfig, self.config).hidden_size
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of linear layer in Mixtral MoEs."""
- return cast(MixtralConfig, self.config).intermediate_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in Mixtral."""
- return cast(MixtralConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in Mixtral."""
- config = cast(MixtralConfig, self.config)
- if config.num_key_value_heads is None:
- return self.decoder_num_attention_heads
- return config.num_key_value_heads
-
- @property
- def attention_window_size(self) -> Optional[int]:
- """The size of sliding window attention in Mixtral."""
- return cast(MixtralConfig, self.config).sliding_window
-
- @property
- def num_experts(self) -> int:
- """The number of moe experts per transformer block in Mixtral."""
- return cast(MixtralConfig, self.config).num_local_experts
-
- @property
- def num_selected_moe_experts(self) -> int:
- """The number of selected moe experts per transformer block in Mixtral."""
- return cast(MixtralConfig, self.config).num_experts_per_tok
diff --git a/friendli/modules/converter/models/mpt.py b/friendli/modules/converter/models/mpt.py
deleted file mode 100644
index 48c332b6..00000000
--- a/friendli/modules/converter/models/mpt.py
+++ /dev/null
@@ -1,397 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli MPT Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-from transformers import ( # type: ignore[import]
- GenerationConfig,
- MptConfig,
- PretrainedConfig,
-)
-
-from friendli.enums import ModelDataType # type: ignore[import]
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- DecoderOnlyConverter,
- DecoderOnlyLoraConverter,
-)
-from friendli.modules.converter.schema import ConvertInfo
-
-
-def safe_attn_config_get(attn_config: Dict[str, Any], key: str) -> Any:
- """Safe getter from MptAttentionConfig.
-
- This function is a temporary function because MptAttentionConfig
- is not supported `attn_type="grouped_query_attention"` yet.
- """
- if key not in attn_config:
- raise CheckpointConversionError(
- f"{key} does not exist in MptAttentionConfig {attn_config}"
- )
-
- return attn_config[key]
-
-
-class MptForCausalLMLoraConverter(DecoderOnlyLoraConverter):
- """MptForCausalLM LoRA Converter Class."""
-
- @property
- def adapter_target_module_map(self) -> Dict[str, str]:
- """Return the dictionary that maps Hugging Face's module name to Friendli's module name."""
- return {
- "Wqkv": "merged-qkv",
- "out_proj": "attn_fc",
- "up_proj": "ff1",
- "down_proj": "ff2",
- "wte": "wte",
- }
-
- @property
- def adapter_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for LoRA adapter modules in Mpt."""
- convert_info_list = []
- target_modules = self.adapter_target_modules
-
- # Non-transformer modules
- if "wte" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=["transformer.wte.lora_embedding_A.default"],
- data_type=self.converter.data_type,
- converted_name="wte/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.wte.lora_embedding_B.default"],
- data_type=self.converter.data_type,
- converted_name="wte/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- # Transformer modules
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.converter.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
-
- if "merged-qkv" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.Wqkv.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.Wqkv.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "attn_fc" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.out_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}attn.out_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "ff1" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.up_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.up_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- if "ff2" in target_modules:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.down_proj.lora_A.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_A/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}mlp.down_proj.lora_B.default.weight"
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/lora/lora_B/weight:0",
- reshape_fn=self.lora_weight_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
-
-class MPTForCausalLMConverter(DecoderOnlyConverter):
- """MPTForCausalLM Architectures Converter Class."""
-
- def __init__(
- self,
- config: PretrainedConfig,
- generation_config: GenerationConfig | None,
- data_type: ModelDataType,
- ) -> None:
- """Initialize MPTForCausalLMConverter."""
- super().__init__(config, generation_config, data_type)
- attn_config = cast(MptConfig, config).attn_config
- if isinstance(attn_config, PretrainedConfig):
- attn_config = attn_config.to_dict() # type: ignore
- self.attn_config = attn_config
-
- def check_config(self) -> None:
- """Check if MPT architectures' config can be converted to Friendli format."""
- super().check_config()
-
- if not safe_attn_config_get(self.attn_config, "alibi"):
- raise NotSupportedCheckpointError(
- invalid_option=f"'alibi={safe_attn_config_get(self.attn_config, 'alibi')}'",
- valid_options=[True],
- )
-
- if safe_attn_config_get(self.attn_config, "alibi_bias_max") != 8:
- raise NotSupportedCheckpointError(
- invalid_option=f"'alibi={safe_attn_config_get(self.attn_config, 'alibi_bias_max')}'",
- valid_options=[8],
- )
-
- if safe_attn_config_get(self.attn_config, "attn_type") != "multihead_attention":
- if (
- safe_attn_config_get(self.attn_config, "attn_type")
- == "grouped_query_attention"
- ):
- raise CheckpointConversionError(
- msg="MptAttentionConfig does not support `attn_type=`grouped_query_attention`` yet (as of transformers==4.35.2).",
- )
- raise NotSupportedCheckpointError(
- invalid_option=f"'attn_type={safe_attn_config_get(self.attn_config, 'attn_type')}'",
- valid_options=["multihead_attention"],
- )
-
- if safe_attn_config_get(self.attn_config, "prefix_lm"):
- raise NotSupportedCheckpointError(
- invalid_option=f"'prefix_lm={safe_attn_config_get(self.attn_config, 'prefix_lm')}'",
- valid_options=[False],
- )
-
- if safe_attn_config_get(self.attn_config, "qk_ln"):
- raise NotSupportedCheckpointError(
- invalid_option=f"'qk_ln={safe_attn_config_get(self.attn_config, 'qk_ln')}'",
- valid_options=[False],
- )
-
- if safe_attn_config_get(self.attn_config, "softmax_scale") is not None:
- raise NotSupportedCheckpointError(
- invalid_option=f"'softmax_scale={safe_attn_config_get(self.attn_config, 'softmax_scale')}'",
- valid_options=[None],
- )
-
- if cast(MptConfig, self.config).expansion_ratio != 4:
- raise NotSupportedCheckpointError(
- invalid_option=(
- f"'expansion_ratio={cast(MptConfig, self.config).expansion_ratio}'"
- ),
- valid_options=[4],
- )
-
- if not cast(MptConfig, self.config).no_bias:
- raise NotSupportedCheckpointError(
- invalid_option=f"'no_bias={cast(MptConfig, self.config).no_bias}'",
- valid_options=[True],
- )
-
- if cast(MptConfig, self.config).logit_scale is not None:
- raise NotSupportedCheckpointError(
- invalid_option=(
- f"'logit_scale={cast(MptConfig, self.config).logit_scale}'"
- ),
- valid_options=[None],
- )
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in MPT."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}norm_1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}norm_2.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.Wqkv.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ffn.up_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ffn.down_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in MPT."""
- return [
- ConvertInfo(
- param_names=["transformer.wte.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["transformer.norm_f.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ]
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The MPT model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- cast(MptConfig, self.config).max_seq_len,
- )
-
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "num_heads": self.decoder_num_attention_heads,
- "num_kv_heads": self.decoder_num_kv_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": cast(MptConfig, self.config).max_seq_len,
- "vocab_size": cast(MptConfig, self.config).vocab_size,
- "clip_qkv": safe_attn_config_get(self.attn_config, "clip_qkv") or 0.0,
- "eos_token": self.get_eos_token_id() or "FILL ME",
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "mpt"
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before the MPT's transformer block number."""
- return "transformer.blocks."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in MPT."""
- return cast(MptConfig, self.config).n_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in MPT."""
- return cast(MptConfig, self.config).d_model
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in MPT."""
- return cast(MptConfig, self.config).n_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in MPT."""
- if "kv_n_heads" in self.attn_config:
- return self.attn_config["kv_n_heads"]
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of MPT."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in MPT MLP."""
- return self.decoder_hidden_size * 4
diff --git a/friendli/modules/converter/models/opt.py b/friendli/modules/converter/models/opt.py
deleted file mode 100644
index 6d8ad8aa..00000000
--- a/friendli/modules/converter/models/opt.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli OPT Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import torch
-from transformers import OPTConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import DECODER_PREFIX, DecoderOnlyConverter
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class OPTForCausalLMConverter(DecoderOnlyConverter):
- """OPTForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if OPT architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(OPTConfig, self.config).activation_function not in ["relu"]:
- raise NotSupportedCheckpointError(
- invalid_option="'activation_function="
- f"{cast(OPTConfig, self.config).activation_function}'",
- valid_options=["relu"],
- )
- if not cast(OPTConfig, self.config).do_layer_norm_before is True:
- raise NotSupportedCheckpointError(
- invalid_option=f"'do_layer_norm_before={False}'",
- valid_options=[True],
- )
- if (
- cast(OPTConfig, self.config).word_embed_proj_dim
- != cast(OPTConfig, self.config).hidden_size
- ):
- raise NotSupportedCheckpointError(
- invalid_option="'word_embed_proj_dim"
- f"({cast(OPTConfig, self.config).word_embed_proj_dim}) "
- f"!= hidden_size({cast(OPTConfig, self.config).hidden_size})'",
- valid_options=[
- f"'word_embed_proj_dim({cast(OPTConfig, self.config).hidden_size}) "
- f"== hidden_size({cast(OPTConfig, self.config).hidden_size})'"
- ],
- )
- if cast( # pylint: disable=protected-access
- OPTConfig, self.config
- )._remove_final_layer_norm:
- raise NotSupportedCheckpointError(
- invalid_option=f"'_remove_final_layer_norm={True}'",
- valid_options=[False],
- )
- if not cast(OPTConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option=f"'tie_word_embeddings={False}'",
- valid_options=[True],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def pos_embed_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """Positional embedding weight convert for OPT's decoder."""
- assert len(params) == 1
- pos_emb = params[0]
- pos_emb = pos_emb[2:, :] # offset pos emb
-
- return pos_emb
-
- def qkv_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """qkv_weight_reshape for OPT's attention layer."""
- qkv_weight = torch.cat(
- params,
- dim=0,
- )
- qkv_weight = qkv_weight.transpose(0, 1)
- return qkv_weight
-
- def qkv_bias_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """qkv_bias_reshape for OPT's attention layer."""
- qkv_bias = torch.cat(
- params,
- dim=0,
- )
- return qkv_bias
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(OPTConfig, self.config)
-
- logger.warn(
- "Since OPT uses absolute position embedding, 'max_length' cannot be "
- "larger than %d.",
- config.max_position_embeddings,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "num_heads": self.decoder_num_attention_heads,
- "num_layers": self.decoder_layer_num,
- "max_length": config.max_position_embeddings,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "opt"
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in OPT."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.bias",
- f"{layer_prefix}self_attn.k_proj.bias",
- f"{layer_prefix}self_attn.v_proj.bias",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/bias:0",
- reshape_fn=self.qkv_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.out_proj.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}final_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}final_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc1.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc2.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc2.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}fc1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}self_attn.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}self_attn.q_proj.weight",
- f"{layer_prefix}self_attn.k_proj.weight",
- f"{layer_prefix}self_attn.v_proj.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in OPT."""
- return [
- ConvertInfo(
- param_names=["model.decoder.embed_tokens.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.decoder.embed_positions.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/wpe/weight:0",
- reshape_fn=self.pos_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.decoder.final_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["model.decoder.final_layer_norm.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ]
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before OPT's transformer block number."""
- return "model.decoder.layers."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in OPT."""
- return cast(OPTConfig, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in OPT."""
- return cast(OPTConfig, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in OPT."""
- return cast(OPTConfig, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in opt."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of OPT."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in codegen OPT."""
- return self.decoder_hidden_size * 4
diff --git a/friendli/modules/converter/models/phi3.py b/friendli/modules/converter/models/phi3.py
deleted file mode 100644
index 3f05b8bd..00000000
--- a/friendli/modules/converter/models/phi3.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Cohere Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-from typing import cast
-
-from transformers.models.phi3 import Phi3Config # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.modules.converter.base import FP8OnlyConverter
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-
-
-class Phi3ForCausalLMConverter(FP8OnlyConverter, RotaryEmbeddingConversionInterface):
- """Phi3ForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if phi3 architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if cast(Phi3Config, self.config).hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(Phi3Config, self.config).hidden_act}'",
- valid_options=["silu"],
- )
- if cast(Phi3Config, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "phi3"
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before phi3's transformer block number."""
- return "model.layers."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in phi3."""
- return cast(Phi3Config, self.config).num_hidden_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in phi3."""
- return cast(Phi3Config, self.config).hidden_size
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in phi3."""
- return cast(Phi3Config, self.config).num_attention_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in phi3."""
- config = cast(Phi3Config, self.config)
- if config.num_key_value_heads is None:
- return self.decoder_num_attention_heads
- return config.num_key_value_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of phi3."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in phi3 MLP."""
- return self.config.intermediate_size
-
- @property
- def rotary_dim(self) -> int:
- """The rotary embedding dimension of phi3."""
- return self.decoder_head_size
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary embedding base of phi3."""
- return cast(Phi3Config, self.config).rope_theta
diff --git a/friendli/modules/converter/models/phi_msft.py b/friendli/modules/converter/models/phi_msft.py
deleted file mode 100644
index 493c4402..00000000
--- a/friendli/modules/converter/models/phi_msft.py
+++ /dev/null
@@ -1,369 +0,0 @@
-# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Phi Checkpoint Converter."""
-
-
-from __future__ import annotations
-
-import math
-from typing import Any, Dict, List, Optional, cast
-
-import torch
-from transformers import PretrainedConfig # type: ignore[import]
-
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- SUPPORTED_GELU_FAMILY,
- DecoderOnlyConverter,
-)
-from friendli.modules.converter.interface import RotaryEmbeddingConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import convert_to_gpt_j_params
-
-
-class PhiMsftConfig(PretrainedConfig):
- """Phi msft configuration. Different from the HuggingFace PhiConfig."""
-
- model_type = "phi"
- attribute_map = {
- "max_position_embeddings": "n_positions",
- "hidden_size": "n_embd",
- "num_attention_heads": "n_head",
- "num_hidden_layers": "n_layer",
- }
-
- def __init__(
- self,
- vocab_size: int = 50304,
- n_positions: int = 2048,
- n_embd: int = 1024,
- n_layer: int = 20,
- n_inner: Optional[int] = None,
- n_head: int = 16,
- n_head_kv: Optional[int] = None,
- rotary_dim: Optional[int] = 32,
- activation_function: Optional[str] = "gelu_new",
- flash_attn: bool = False,
- flash_rotary: bool = False,
- fused_dense: bool = False,
- attn_pdrop: float = 0.0,
- embd_pdrop: float = 0.0,
- resid_pdrop: float = 0.0,
- layer_norm_epsilon: float = 1e-5,
- initializer_range: float = 0.02,
- tie_word_embeddings: bool = False,
- pad_vocab_size_multiple: int = 64,
- **kwargs,
- ) -> None:
- """Initalize the configuration for a phi-msft model."""
- self.vocab_size = int(
- math.ceil(vocab_size / pad_vocab_size_multiple) * pad_vocab_size_multiple
- )
- self.n_positions = n_positions
- self.n_embd = n_embd
- self.n_layer = n_layer
- self.n_inner = n_inner
- self.n_head = n_head
- self.n_head_kv = n_head_kv
- self.rotary_dim = min(rotary_dim, n_embd // n_head) # type: ignore[type-var]
- self.activation_function = activation_function
- self.flash_attn = flash_attn
- self.flash_rotary = flash_rotary
- self.fused_dense = fused_dense
- self.attn_pdrop = attn_pdrop
- self.embd_pdrop = embd_pdrop
- self.resid_pdrop = resid_pdrop
- self.layer_norm_epsilon = layer_norm_epsilon
- self.initializer_range = initializer_range
-
- super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
-
-
-class PhiForCausalLMConverter(DecoderOnlyConverter):
- """PhiForCausalLM Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if Phi architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if (
- cast(PhiMsftConfig, self.config).activation_function
- not in SUPPORTED_GELU_FAMILY
- ):
- raise NotSupportedCheckpointError(
- invalid_option="'activation_function="
- f"{cast(PhiMsftConfig, self.config).activation_function}'",
- valid_options=SUPPORTED_GELU_FAMILY,
- )
- if cast(PhiMsftConfig, self.config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def qkv_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """qkv_weight_reshape for Phi's attention layer."""
- assert len(params) == 1
- qkv_weight = params[0]
-
- q_size = self.decoder_num_attention_heads * self.decoder_head_size
- kv_size = self.decoder_num_kv_attention_heads * self.decoder_head_size
- q_weight, k_weight, v_weight = torch.split(
- qkv_weight, [q_size, kv_size, kv_size], dim=0
- )
-
- q_weight = q_weight.reshape(
- self.decoder_num_attention_heads,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = k_weight.reshape(
- self.decoder_num_kv_attention_heads,
- self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim)
- k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim)
-
- q_weight = q_weight.reshape(
- self.decoder_num_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
- k_weight = k_weight.reshape(
- self.decoder_num_kv_attention_heads * self.decoder_head_size,
- self.decoder_hidden_size,
- )
-
- qkv_weight = torch.cat([q_weight, k_weight, v_weight], dim=0)
- qkv_weight = qkv_weight.transpose(0, -1)
- return qkv_weight
-
- def qkv_bias_reshape(self, params: List[torch.Tensor]) -> torch.Tensor:
- """qkv_bias_reshape for Phi's attention layer."""
- assert len(params) == 1
- qkv_bias = params[0]
-
- q_size = self.decoder_num_attention_heads * self.decoder_head_size
- kv_size = self.decoder_num_kv_attention_heads * self.decoder_head_size
-
- q_bias, k_bias, v_bias = torch.split(
- qkv_bias, [q_size, kv_size, kv_size], dim=0
- )
-
- q_bias = q_bias.reshape(
- self.decoder_num_attention_heads, self.decoder_head_size
- )
- k_bias = k_bias.reshape(
- self.decoder_num_kv_attention_heads, self.decoder_head_size
- )
-
- q_bias = convert_to_gpt_j_params(q_bias, self.rotary_dim).flatten()
- k_bias = convert_to_gpt_j_params(k_bias, self.rotary_dim).flatten()
-
- qkv_bias = torch.cat((q_bias, k_bias, v_bias), dim=0)
- return qkv_bias
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(PhiMsftConfig, self.config)
-
- logger.info(
- "The generated attributes set 'max_length' to %d, but you can change the "
- "'max_length' according to your needs. The Phi model does not rely on "
- "absolute position embeddings, allowing you to choose any suitable value.",
- config.n_positions,
- )
-
- eos_token_id = self.get_eos_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.decoder_head_size,
- "rotary_dim": self.rotary_dim,
- "num_heads": self.decoder_num_attention_heads,
- "num_kv_heads": self.decoder_num_kv_attention_heads,
- "num_layers": self.decoder_layer_num,
- "ff_intermediate_size": self.decoder_ff_intermediate_size,
- "max_length": config.n_positions,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "rope_theta": self.rotary_emb_base,
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- return "phi"
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in Phi."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ln.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}ln_1/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc1.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc1.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc2.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.fc2.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mixer.Wqkv.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mixer.Wqkv.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/bias:0",
- reshape_fn=self.qkv_bias_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mixer.out_proj.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mixer.out_proj.bias"],
- data_type=self.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/bias:0",
- reshape_fn=self.linear_bias_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for non-transformer blocks in Phi."""
- return [
- ConvertInfo(
- param_names=["transformer.embd.wte.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.ln.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.ln.bias"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/beta:0",
- reshape_fn=self.ln_bias_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.linear.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ConvertInfo(
- param_names=["lm_head.linear.bias"],
- data_type=self.data_type,
- converted_name="head_fc/bias:0",
- reshape_fn=self.head_weight_reshape,
- ),
- ]
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before Phi's transformer module number."""
- return "transformer.h."
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of decoder layers in Phi."""
- return cast(PhiMsftConfig, self.config).n_layer
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size in Phi."""
- return cast(PhiMsftConfig, self.config).n_embd
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads in Phi."""
- return cast(PhiMsftConfig, self.config).n_head
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads in Phi."""
- config = cast(PhiMsftConfig, self.config)
- if config.n_head_kv is not None:
- return config.n_head_kv
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of Phi."""
- return self.decoder_hidden_size // self.decoder_num_attention_heads
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate size of the linear layer in codegen MLP."""
- config = cast(PhiMsftConfig, self.config)
- if config.n_inner is None:
- return self.decoder_hidden_size * 4
- return config.n_inner
-
- @property
- def rotary_dim(self) -> int:
- """The rotary dim in Phi."""
- return cast(PhiMsftConfig, self.config).rotary_dim # type: ignore[return-value]
-
- @property
- def rotary_emb_base(self) -> float:
- """The rotary emb base in Phi."""
- return 10000.0
diff --git a/friendli/modules/converter/models/t5.py b/friendli/modules/converter/models/t5.py
deleted file mode 100644
index ba188bd2..00000000
--- a/friendli/modules/converter/models/t5.py
+++ /dev/null
@@ -1,444 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli T5 Checkpoint Converter."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, cast
-
-import torch
-from transformers import T5Config # type: ignore[import]
-
-from friendli.enums import ModelDataType
-from friendli.errors import CheckpointConversionError, NotSupportedCheckpointError
-from friendli.logging import logger
-from friendli.modules.converter.base import (
- DECODER_PREFIX,
- ENCODER_PREFIX,
- EncoderDecoderConverter,
-)
-from friendli.modules.converter.schema import ConvertInfo
-
-
-class T5Converter(EncoderDecoderConverter):
- """T5ForConditionalGeneration Architectures Converter Class."""
-
- def check_config(self) -> None:
- """Check if T5 architectures' config can be converted to Friendli format."""
- super().check_config()
- try:
- if not (
- cast(T5Config, self.config).is_gated_act
- ^ cast(T5Config, self.config).tie_word_embeddings
- ):
- raise NotSupportedCheckpointError(
- invalid_option=f"'is_gated_act={cast(T5Config, self.config).is_gated_act}'and "
- f"'tie_word_embeddings={cast(T5Config, self.config).tie_word_embeddings}'",
- valid_options=[
- "'is_gated_act' and 'tie_word_embeddings' should be different."
- ],
- )
-
- if cast(T5Config, self.config).layer_norm_epsilon != 1e-6:
- raise NotSupportedCheckpointError(
- invalid_option="'layer_norm_epsilon="
- f"{cast(T5Config, self.config).layer_norm_epsilon}'",
- valid_options=[1e-6],
- )
- except AttributeError as exc:
- raise CheckpointConversionError(str(exc)) from exc
-
- def _decoder_final_ln_weight_reshape(
- self, params: List[torch.Tensor]
- ) -> torch.Tensor:
- """Special handle for T5."""
- assert len(params) == 1
- param = params[0]
-
- if cast(T5Config, self.config).tie_word_embeddings:
- param = param * (cast(T5Config, self.config).d_model ** -0.5)
-
- return param
-
- def pos_embed_weight_reshape(
- self,
- params: List[torch.Tensor],
- ) -> torch.Tensor:
- """Reshape positional embedding weights in T5."""
- assert len(params) == 1
- return params[0]
-
- def get_attributes(self) -> Dict[str, Any]:
- """Get checkpoint attributes."""
- config = cast(T5Config, self.config)
-
- logger.warn(
- "The 'max_input_length' and 'max_output_length' fields are left blank as "
- "they cannot be automatically configured. "
- "Determine the 'max_input_length' and 'max_output_length' according to your "
- "needs. The T5 model does not rely on absolute position embeddings, "
- "allowing you to choose any suitable value."
- )
-
- eos_token_id = self.get_eos_token_id()
- decoder_start_token_id = self.get_decoder_start_token_id()
- attr = {
- "model_type": self.model_type,
- "dtype": self.data_type.value,
- "head_size": self.encoder_head_size,
- "num_heads": self.encoder_num_attention_heads,
- "hidden_size": self.encoder_hidden_size,
- "ff_intermediate_size": self.decoder_ff_intermediate_size,
- "num_encoder_layers": self.encoder_layer_num,
- "num_decoder_layers": self.decoder_layer_num,
- "max_input_length": "FILL ME",
- "max_output_length": "FILL ME",
- "num_pos_emb_buckets": config.relative_attention_num_buckets,
- "max_pos_distance": config.relative_attention_max_distance,
- "vocab_size": config.vocab_size,
- "eos_token": eos_token_id if eos_token_id is not None else "FILL ME",
- "decoder_start_token": (
- decoder_start_token_id
- if decoder_start_token_id is not None
- else "FILL ME"
- ),
- }
- return attr
-
- @property
- def model_type(self) -> str:
- """Model type."""
- if cast(T5Config, self.config).is_gated_act:
- return "t5-v1_1"
- return "t5"
-
- @property
- def encoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in T5's encoder."""
- convert_info_list = []
- for i in range(self.encoder_layer_num):
- layer_prefix = f"{self.encoder_layer_prefix}{i}."
- converted_prefixe = f"{ENCODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.0.layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.1.layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.0.SelfAttention.q.weight",
- f"{layer_prefix}layer.0.SelfAttention.k.weight",
- f"{layer_prefix}layer.0.SelfAttention.v.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.0.SelfAttention.o.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
-
- if cast(T5Config, self.config).is_gated_act:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.1.DenseReluDense.wi_0.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_gate/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.1.DenseReluDense.wi_1.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.1.DenseReluDense.wo.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- else:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.1.DenseReluDense.wi.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.1.DenseReluDense.wo.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
- @property
- def decoder_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The list of conversion informations for transformer blocks in T5's decoder."""
- convert_info_list = []
- for i in range(self.decoder_layer_num):
- layer_prefix = f"{self.decoder_layer_prefix}{i}."
- converted_prefixe = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.0.layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}ln_1/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.1.layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}ln_2/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.2.layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}ln_3/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.0.SelfAttention.q.weight",
- f"{layer_prefix}layer.0.SelfAttention.k.weight",
- f"{layer_prefix}layer.0.SelfAttention.v.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.0.SelfAttention.o.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.1.EncDecAttention.q.weight",
- f"{layer_prefix}layer.1.EncDecAttention.k.weight",
- f"{layer_prefix}layer.1.EncDecAttention.v.weight",
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}cross_attn/c_attn/weight:0",
- reshape_fn=self.qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}layer.1.EncDecAttention.o.weight"],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}cross_attn/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
-
- if cast(T5Config, self.config).is_gated_act:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.2.DenseReluDense.wi_0.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_gate/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.2.DenseReluDense.wi_1.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.2.DenseReluDense.wo.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
- else:
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.2.DenseReluDense.wi.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_fc/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}layer.2.DenseReluDense.wo.weight"
- ],
- data_type=self.data_type,
- converted_name=f"{converted_prefixe}mlp/c_proj/weight:0",
- reshape_fn=self.linear_weight_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
- @property
- def non_transformer_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """The convert_info_list for non-transformer blocks in T5."""
- convert_info_list = [
- ConvertInfo(
- param_names=[f"shared.weight"],
- data_type=self.data_type,
- converted_name="wte/weight:0",
- reshape_fn=self.token_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"
- ],
- data_type=ModelDataType.FP32,
- converted_name=f"{ENCODER_PREFIX}/wpe/weight:0",
- reshape_fn=self.pos_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=[
- "decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"
- ],
- data_type=ModelDataType.FP32,
- converted_name=f"{DECODER_PREFIX}/wpe/weight:0",
- reshape_fn=self.pos_embed_weight_reshape,
- ),
- ConvertInfo(
- param_names=["encoder.final_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{ENCODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=["decoder.final_layer_norm.weight"],
- data_type=self.data_type,
- converted_name=f"{DECODER_PREFIX}/ln_f/gamma:0",
- reshape_fn=self._decoder_final_ln_weight_reshape,
- ),
- ]
-
- if not cast(T5Config, self.config).tie_word_embeddings:
- convert_info_list.append(
- ConvertInfo(
- param_names=["lm_head.weight"],
- data_type=self.data_type,
- converted_name="head_fc/weight:0",
- reshape_fn=self.head_weight_reshape,
- )
- )
-
- return convert_info_list
-
- @property
- def encoder_layer_prefix(self) -> str:
- """The layer name prefix used before T5 encoder's transformer block number."""
- return "encoder.block."
-
- @property
- def decoder_layer_prefix(self) -> str:
- """The layer name prefix used before T5 decoder's transformer block number."""
- return "decoder.block."
-
- @property
- def encoder_layer_num(self) -> int:
- """The number of transformer blocks in T5 encoder."""
- return cast(T5Config, self.config).num_layers
-
- @property
- def encoder_hidden_size(self) -> int:
- """The hidden size of T5 encoder."""
- return cast(T5Config, self.config).d_model
-
- @property
- def encoder_num_attention_heads(self) -> int:
- """The number of attention heads of T5 encoder."""
- return cast(T5Config, self.config).num_heads
-
- @property
- def encoder_head_size(self) -> int:
- """The head size of T5 encoder."""
- return cast(T5Config, self.config).d_kv
-
- @property
- def encoder_ff_intermediate_size(self) -> int:
- """The intermediate of the linear layer in T5 encoder's MLP."""
- return cast(T5Config, self.config).d_ff
-
- @property
- def decoder_layer_num(self) -> int:
- """The number of transformer blocks in T5 decoder."""
- return cast(T5Config, self.config).num_decoder_layers
-
- @property
- def decoder_hidden_size(self) -> int:
- """The hidden size of T5 decoder."""
- return cast(T5Config, self.config).d_model
-
- @property
- def decoder_num_attention_heads(self) -> int:
- """The number of attention heads of T5 decoder."""
- return cast(T5Config, self.config).num_heads
-
- @property
- def decoder_num_kv_attention_heads(self) -> int:
- """The number of key-value attention heads of t5 decoder."""
- return self.decoder_num_attention_heads
-
- @property
- def decoder_head_size(self) -> int:
- """The head size of T5 decoder."""
- return cast(T5Config, self.config).d_kv
-
- @property
- def decoder_ff_intermediate_size(self) -> int:
- """The intermediate of the linear layer in T5 decoder's MLP."""
- return cast(T5Config, self.config).d_ff
diff --git a/friendli/modules/converter/saver.py b/friendli/modules/converter/saver.py
deleted file mode 100644
index e9d6d2ae..00000000
--- a/friendli/modules/converter/saver.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved.
-
-"""Savers to save a converted checkpoints into various file types."""
-
-from __future__ import annotations
-
-import json
-import os
-from abc import abstractmethod
-from contextlib import AbstractContextManager
-from typing import Dict, Generic, List, TypeVar, Union
-
-import h5py # type: ignore[import]
-import numpy as np
-import safetensors.numpy # type: ignore[import]
-import safetensors.torch # type: ignore[import]
-import torch
-from typing_extensions import Self
-
-from friendli.enums import CheckpointFileType
-from friendli.errors import CheckpointConversionError
-from friendli.logging import logger
-
-
-def get_saver(
- ckpt_file_type: CheckpointFileType, output_dir: str, output_file_name: str
-) -> CheckpointSaver:
- """Create a saver that corresponds to the file type."""
- if ckpt_file_type == CheckpointFileType.HDF5:
- return HDF5Saver(output_dir, output_file_name)
- if ckpt_file_type == CheckpointFileType.SAFETENSORS:
- return SafetensorsSaver(output_dir, output_file_name)
- raise CheckpointConversionError(
- f"Output file type {ckpt_file_type} is not supported."
- )
-
-
-class CheckpointSaver(AbstractContextManager):
- """Abstract for savers."""
-
- def __init__(
- self, output_dir: Union[str, os.PathLike], output_file_name: str
- ) -> None:
- """Check that the output file already exists."""
- super().__init__()
- self._output_dir = output_dir
- self._output_file_name = output_file_name
-
- @abstractmethod
- def save_tensor(self, tensor_id: str, t: Union[np.ndarray, torch.Tensor]) -> None:
- """Save the tensor in the file."""
- raise NotImplementedError
-
- @abstractmethod
- def close(self) -> None:
- """Close the output checkpoint file."""
- raise NotImplementedError
-
- def __enter__(self) -> Self:
- """Enter for context manager."""
- return self
-
- def __exit__(self, *exc) -> None:
- """Exit for context manager."""
- self.close()
-
-
-class HDF5Saver(CheckpointSaver):
- """Saver for HDF5."""
-
- def __init__(self, output_dir: str, output_file_name: str) -> None:
- """Create a HDF5 file."""
- super().__init__(output_dir, output_file_name)
- self._out_f = h5py.File(os.path.join(output_dir, output_file_name), "w")
-
- def save_tensor(self, tensor_id: str, t: Union[np.ndarray, torch.Tensor]) -> None:
- """Create a group if not exists, and save the tensor in the file."""
- assert isinstance(t, np.ndarray)
- self._out_f[tensor_id] = t
-
- def close(self) -> None:
- """Close the HDF5 file."""
- self._out_f.close()
-
-
-T = TypeVar("T")
-
-
-class SafetensorsSaverInterface(Generic[T]):
- """Interface for saving safetensor format."""
-
- def get_weight_size(self, tensor: T) -> int:
- """Get total weight size in `Byte` unit."""
- raise NotImplementedError
-
- def save_file(self, tensor: Dict[str, T], path: str) -> None:
- """Save given tensor to path."""
- raise NotImplementedError
-
-
-class TorchSafetensorsSaverInterface(SafetensorsSaverInterface[torch.Tensor]):
- """Interface for saving safetensor format."""
-
- def get_weight_size(self, tensor: torch.Tensor) -> int:
- """Get total weight size in `Byte` unit."""
- return tensor.itemsize * tensor.numel()
-
- def save_file(self, tensor: Dict[str, torch.Tensor], path: str) -> None:
- """Save given tensor to path."""
- safetensors.torch.save_file(tensor, path)
-
-
-class NumpySafetensorsSaverInterface(SafetensorsSaverInterface[np.ndarray]):
- """Interface for saving safetensor format."""
-
- def get_weight_size(self, tensor: np.ndarray) -> int:
- """Get total weight size in `Byte` unit."""
- return tensor.itemsize * tensor.size
-
- def save_file(self, tensor: Dict[str, np.ndarray], path: str) -> None:
- """Save given tensor to path."""
- safetensors.numpy.save_file(tensor, path)
-
-
-class UnionSafetensorsSaverInterface(
- SafetensorsSaverInterface[Union[torch.Tensor, np.ndarray]]
-):
- """Interface for saving safetensor format."""
-
- def __init__(self) -> None:
- """Initialize UnionSafetensorsSaverInterface."""
- self._sub_itfcs = {
- np.ndarray: NumpySafetensorsSaverInterface(),
- torch.Tensor: TorchSafetensorsSaverInterface(),
- }
- super().__init__()
-
- def get_weight_size(self, tensor: Union[torch.Tensor, np.ndarray]) -> int:
- """Get total weight size in `Byte` unit."""
- return self._sub_itfcs[type(tensor)].get_weight_size(tensor) # type: ignore[attr-defined]
-
- def save_file(
- self, tensor: Dict[str, Union[torch.Tensor, np.ndarray]], path: str
- ) -> None:
- """Save given tensor to path."""
- if len(tensor) == 0:
- logger.warn("No tensor to save. Skip saving tensors..")
- return
- # NOTE: Assume that all tensors are the same type
- tensor_type = type(next(iter(tensor.values())))
- itfc = self._sub_itfcs[tensor_type]
- itfc.save_file(tensor, path) # type: ignore[attr-defined]
-
-
-class SafetensorsSaver(CheckpointSaver):
- """Saver for Safetensors.
-
- This temporally saves the converted tensors in local memory.
- Then, all of the tensors are saved in the file at a time when close() is called,
- because Safetensors does not support stream saving.
- """
-
- def __init__(
- self, output_dir: Union[str, os.PathLike], output_file_name: str
- ) -> None:
- """Initialize a saver."""
- super().__init__(output_dir, output_file_name)
- self._tensors: Dict[str, Union[np.ndarray, torch.Tensor]] = {}
- self._saver: UnionSafetensorsSaverInterface = UnionSafetensorsSaverInterface()
-
- def save_tensor(self, tensor_id: str, t: Union[np.ndarray, torch.Tensor]) -> None:
- """Save the tensor in the local memory."""
- self._tensors[tensor_id] = t
-
- def shard_checkpoint(self, max_shard_size: str):
- """Shard the checkpoint with index."""
- # pylint: disable=too-many-locals
- int_max_shard_size = int(max_shard_size[:-2]) * (10**9)
- sharded_tensors: List[Dict[str, Union[np.ndarray, torch.Tensor]]] = [{}]
- last_block_size = 0
- total_size = 0
-
- for key, weight in self._tensors.items():
- weight_size = self._saver.get_weight_size(weight)
- if (
- last_block_size + weight_size > int_max_shard_size
- and len(sharded_tensors[-1]) > 0
- ):
- sharded_tensors.append({})
- last_block_size = 0
-
- sharded_tensors[-1][key] = weight
- last_block_size += weight_size
- total_size += weight_size
-
- if len(sharded_tensors) == 1:
- return {self._output_file_name: sharded_tensors[0]}, None
-
- weight_map = {}
- shards = {}
- for idx, shard in enumerate(sharded_tensors):
- shard_file = self._output_file_name.replace(
- ".safetensors",
- f"-{idx + 1:05d}-of-{len(sharded_tensors):05d}.safetensors",
- )
- shards[shard_file] = shard
- for key in shard.keys():
- weight_map[key] = shard_file
-
- metadata = {"total_size": total_size}
- index = {"metadata": metadata, "weight_map": weight_map}
- return shards, index
-
- def _save_to_file(self) -> None:
- """Save the tensors in the file."""
- logger.info("Saving the converted checkpoint...")
-
- max_shard_size = "10GB"
- shards, index = self.shard_checkpoint(max_shard_size)
-
- for shard_file, shard in shards.items():
- self._saver.save_file(shard, os.path.join(self._output_dir, shard_file))
-
- if index is None:
- path_to_weights = os.path.join(self._output_dir, self._output_file_name)
- logger.info("Model weights saved in (%s)", path_to_weights)
- else:
- save_index_file = os.path.join(
- self._output_dir, "model.safetensors.index.json"
- )
- # Save the index as well
- with open(save_index_file, "w", encoding="utf-8") as f:
- content = json.dumps(index, indent=2, sort_keys=True) + "\n"
- f.write(content)
- logger.info(
- "The model is bigger than the maximum size per checkpoint %s "
- " and is going to be split in %s checkpoint shards. You can find "
- "where each parameters has been saved in the index located at (%s).",
- max_shard_size,
- str(len(shards)),
- save_index_file,
- )
-
- def close(self) -> None:
- """Save the tensors in the file."""
- self._save_to_file()
diff --git a/friendli/modules/converter/schema.py b/friendli/modules/converter/schema.py
deleted file mode 100644
index 21f034b1..00000000
--- a/friendli/modules/converter/schema.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Converter Schema."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Callable, List
-
-import torch
-
-from friendli.enums import ModelDataType
-
-
-@dataclass
-class ConvertInfo:
- """Dataclass for convert information of the parameter in huggingface checkpoint.
-
- Args:
- param_names(List[str]): List of parameter names in the huggingface checkpoint.
- data_type(ModelDataType): Data type of the parameter.
- converted_name(str): Name of the converted parameter.
- reshape_fn(Callable[[List[torch.tensor]], np.ndarray]):
- Function to reshape the tensor from the huggignface checkpoint.
- """
-
- param_names: List[str]
- data_type: ModelDataType
- converted_name: str
- reshape_fn: Callable[[List[torch.Tensor]], torch.Tensor]
diff --git a/friendli/modules/converter/utils.py b/friendli/modules/converter/utils.py
deleted file mode 100644
index 4b9588f4..00000000
--- a/friendli/modules/converter/utils.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Converter Utils."""
-
-from __future__ import annotations
-
-import os
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
-
-import torch
-from peft import PeftConfig # type: ignore[import] # pylint: disable=import-error
-from transformers import ( # type: ignore[import]
- AutoConfig,
- AutoTokenizer,
- GenerationConfig,
- PretrainedConfig,
- PreTrainedTokenizer,
-)
-
-from friendli.enums import ModelDataType
-from friendli.errors import (
- CheckpointConversionError,
- NotFoundError,
- NotSupportedCheckpointError,
- TokenizerNotFoundError,
-)
-
-
-def convert_to_gpt_j_params(param: torch.Tensor, rotary_dim: int) -> torch.Tensor:
- """Reshape weight or bias tensor with rotary embedding to gpt-j format.
-
- Args:
- param (torch.Tensor): Target tensor to convert. Shape must be (num_heads, head_size, ...)
- rotary_dim (int): Degree of rotary embedding
-
- Returns:
- Torch tensor that heads are rotated.
-
- Raises:
- CheckpointConversionError: If arguments do not satisfy the requirements.
-
- """
- if param.ndim < 2:
- raise CheckpointConversionError(
- "Tensor dimension should be greater or equal than 2 for rotary conversion, "
- f"but got {param.ndim}"
- )
-
- head_size = param.shape[1]
- if rotary_dim > head_size:
- raise CheckpointConversionError(
- f"'rotary_dim' ({rotary_dim}) should be less or equal than 'head_size' ({head_size})"
- )
-
- param_rot = param[:, :rotary_dim]
- param_pass = param[:, rotary_dim:]
-
- origin_shape = param_rot.shape
- param_rot_1 = param_rot[:, : rotary_dim // 2]
- param_rot_2 = param_rot[:, rotary_dim // 2 :]
- param_rot = torch.stack((param_rot_1, param_rot_2), dim=2).reshape(*origin_shape)
-
- return torch.cat((param_rot, param_pass), dim=1)
-
-
-def get_tensor_from_state_dict(
- state_dict: Dict[str, Any], tensor_name: str
-) -> torch.Tensor:
- """Get the tensor whose name is 'tensor_name' from 'state_dict'.
-
- Args:
- state_dict (Dict[str, Any]): Model checkpoint's state_dict.
- tensor_name (str): Name of tensor to get.
-
- Returns:
- Corresponding torch Tensor.
-
- Raises:
- CheckpointConversionError: If 'tensor_name' does not exist in 'state_dict'
-
- """
- if tensor_name not in state_dict:
- raise CheckpointConversionError(
- f"Cannot find '{tensor_name}' in the model checkpoint"
- )
-
- return state_dict[tensor_name]
-
-
-def get_torch_data_type(data_type: str) -> torch.dtype:
- """Get torch data type from Enum."""
- if data_type == ModelDataType.FP16:
- return torch.float16
- if data_type == ModelDataType.FP32:
- return torch.float32
- if data_type == ModelDataType.BF16:
- return torch.bfloat16
- raise CheckpointConversionError(
- f"Can't not converted original param to {data_type}."
- )
-
-
-def get_model_data_type(torch_dtype: torch.dtype) -> ModelDataType:
- """Get torch data type from Enum."""
- if torch_dtype == torch.float16:
- return ModelDataType.FP16
- if torch_dtype == torch.float32:
- return ModelDataType.FP32
- if torch_dtype == torch.bfloat16:
- return ModelDataType.BF16
- raise CheckpointConversionError(f"{torch_dtype} is not valid dtype.")
-
-
-def convert_tensor_dtype(
- param: torch.Tensor,
- data_type: Union[ModelDataType, torch.dtype],
-) -> torch.Tensor:
- """Convert tensor format to the given data type.
-
- Args:
- param (torch.Tensor): The tensor to be converted.
- data_type (ModelDataType): The data type of the tensor.
-
- Returns:
- torch.Tensor: The converted tensor.
-
- """
- dtype_map = {
- ModelDataType.FP8_E4M3: torch.float8_e4m3fn,
- ModelDataType.BF16: torch.bfloat16,
- ModelDataType.FP16: torch.float16,
- ModelDataType.FP32: torch.float32,
- ModelDataType.INT4: torch.int8,
- ModelDataType.INT8: torch.int8,
- }
-
- dtype = dtype_map[data_type] if isinstance(data_type, ModelDataType) else data_type
-
- if dtype is torch.float8_e4m3fn:
- return param.detach().to(dtype).view(dtype=torch.int8).to("cpu")
-
- if dtype is torch.bfloat16:
- return param.detach().to(dtype).to("cpu")
-
- if data_type is ModelDataType.INT4:
- pack_num = 8 // 4
- int4_param = torch.zeros(
- (param.shape[0], param.shape[1] // pack_num),
- dtype=torch.uint8,
- device=param.device,
- )
- for col in range(int4_param.shape[1]):
- for i in range(pack_num):
- int4_param[:, col] |= param[:, col * pack_num + i] << (i * 4)
- param = int4_param
-
- return param.detach().to(dtype).to("cpu")
-
-
-def get_tokenizer(
- model_name_or_path: str,
- *,
- cache_dir: Optional[str] = None,
-) -> PreTrainedTokenizer:
- """Try to get tokenizer of a pretrained model."""
- try:
- tokenizer = AutoTokenizer.from_pretrained(
- model_name_or_path,
- cache_dir=cache_dir,
- trust_remote_code=True,
- )
- except OSError as exc:
- raise TokenizerNotFoundError(str(exc)) from exc
-
- if not tokenizer.is_fast:
- raise TokenizerNotFoundError(
- "This model does not support Friendli-compatible tokenizer"
- )
-
- if tokenizer.pad_token != "":
- tokenizer.pad_token = tokenizer.eos_token
- if tokenizer.pad_token is None:
- tokenizer.pad_token = tokenizer.eos_token
-
- return tokenizer
-
-
-def save_tokenizer(
- model_name_or_path: str,
- *,
- cache_dir: Optional[str] = None,
- save_dir: str,
-) -> Tuple[str, ...]:
- """Try to save `tokenizer.json` of a pretrained model."""
- if not os.path.isdir(save_dir):
- raise NotFoundError(f"Directory '{save_dir}' is not found.")
-
- tokenizer = get_tokenizer(model_name_or_path, cache_dir=cache_dir)
- saved_file_paths = tokenizer.save_pretrained(save_directory=save_dir)
- tokenizer_json_path = None
- for path in saved_file_paths:
- if "tokenizer.json" == os.path.basename(path):
- tokenizer_json_path = path
- break
-
- if tokenizer_json_path is None:
- raise TokenizerNotFoundError(
- "This model has the Friendli-compatible tokenizer implementation, but "
- "'tokenizer.json' file is not found."
- )
- return saved_file_paths
-
-
-def get_model_generation_config(
- model_name_or_path: str, cache_dir: Optional[str] = None
-) -> Optional[GenerationConfig]:
- """Get HuggingFace model generation config."""
- try:
- generation_config = GenerationConfig.from_pretrained(
- model_name_or_path, cache_dir=cache_dir, trust_remote_code=True
- )
- except (OSError, TypeError):
- generation_config = None
-
- return generation_config
-
-
-def get_model_pretrained_config(
- model_name_or_path: str, model_output_path: str, cache_dir: Optional[str] = None
-) -> PretrainedConfig:
- """Get HuggingFace model configs."""
- try:
- config = AutoConfig.from_pretrained(
- model_name_or_path, cache_dir=cache_dir, trust_remote_code=True
- )
- except OSError as exc: # from AutoConfig.from_pretrained()
- config_dir = Path(model_name_or_path)
- model_output_dir = Path(model_output_path).parent
- if config_dir.exists() and model_output_dir.absolute() == config_dir.absolute():
- raise NotFoundError(
- f"'output_dir' ({model_output_dir.as_posix()}) and "
- f"'model_name_or_path' ({model_name_or_path}) are the same. "
- "In such a case, checkpoints should be prepared in 'output_dir'."
- ) from exc
- raise NotFoundError(str(exc)) from exc
-
- return config
-
-
-def get_model_arch(config: PretrainedConfig) -> str:
- """Get HuggingFace model architecture from config."""
- model_arch_list = cast(List[str], cast(PretrainedConfig, config).architectures)
- if len(model_arch_list) == 0:
- raise NotSupportedCheckpointError(
- invalid_option=f"'architectures={model_arch_list}'",
- valid_options=["non empty list of architectures"],
- )
- model_arch = model_arch_list[0]
- return model_arch
-
-
-def get_adapter_config(
- adapter_name_or_path: str, cache_dir: Optional[str]
-) -> PeftConfig:
- """Get PeftConfig for Adapter."""
- try:
- adapter_config = PeftConfig.from_pretrained(
- adapter_name_or_path, cache_dir=cache_dir, trust_remote_code=True
- )
- except ValueError as exc:
- raise NotFoundError(str(exc)) from exc
- return adapter_config
diff --git a/friendli/modules/quantizer/__init__.py b/friendli/modules/quantizer/__init__.py
deleted file mode 100644
index 9d1a3117..00000000
--- a/friendli/modules/quantizer/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli model quantizer."""
diff --git a/friendli/modules/quantizer/awq/__init__.py b/friendli/modules/quantizer/awq/__init__.py
deleted file mode 100644
index 50a1020d..00000000
--- a/friendli/modules/quantizer/awq/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model AWQ Quantizer."""
diff --git a/friendli/modules/quantizer/awq/base.py b/friendli/modules/quantizer/awq/base.py
deleted file mode 100644
index 172d214c..00000000
--- a/friendli/modules/quantizer/awq/base.py
+++ /dev/null
@@ -1,513 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli AWQ Quantizer Base."""
-
-from __future__ import annotations
-
-import gc
-from abc import abstractmethod
-from dataclasses import fields
-from typing import Any, Dict, Iterator, List, Tuple, Type, cast
-
-import datasets # type: ignore[import]
-import torch
-from datasets.utils.logging import disable_progress_bar # type: ignore[import]
-from tqdm import tqdm
-
-from friendli.enums import ModelDataType
-from friendli.errors import QuantizationError
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import get_tokenizer
-from friendli.modules.quantizer.awq.utils import (
- apply_module_clip,
- apply_module_scale,
- search_module_clip,
- search_module_scale,
-)
-from friendli.modules.quantizer.base import AbstractQuantHook, CommonQuantizer
-from friendli.modules.quantizer.layers import WeightOnlyQuantizedLinearLayer
-from friendli.modules.quantizer.schema.config import AWQConfig
-from friendli.modules.quantizer.schema.data import (
- ModuleName,
- QuantInput,
- TFQuantInputs,
- TFQuantResults,
- WeightOnlyQuantResult,
-)
-from friendli.modules.quantizer.utils import (
- collect_inps,
- get_weight_only_quant_scales,
- quantized_linear_weight_reshape,
- quantized_qkv_weight_reshape,
- safe_load_datasets,
- scale_reshape,
-)
-
-
-class AWQScaler(torch.nn.Module):
- """Store AWQ scale before linear layers.
-
- If the linear layer is quantized, but the previous layer can't be scaled,
- then we need to store the AWQ scale in a separate module. This module
- is used to store the AWQ scale.
- """
-
- def __init__(self, in_dim: int):
- """Initialize AWQScaler."""
- super().__init__()
- self.scale = torch.nn.Parameter(torch.ones(in_dim))
-
- def forward(self, x):
- """Scale input by AWQ scale."""
- return (x / self.scale.view(1, 1, -1)).to(x.dtype)
-
-
-class AWQHook(AbstractQuantHook):
- """Quantization Hook for AWQ."""
-
- @abstractmethod
- def iter_inspect_modules(
- self,
- block: torch.nn.Module,
- ) -> Iterator[
- Tuple[
- List[torch.nn.Module],
- List[Tuple[ModuleName, torch.nn.Linear]],
- torch.nn.Module,
- ModuleName,
- ]
- ]:
- """Returns iterator of modules to inspect for AWQ scale."""
-
- @abstractmethod
- def add_pre_scaler(
- self,
- model: torch.nn.Module,
- ) -> torch.nn.Module:
- """Add scaler for storing AWQ scale in modules."""
-
- @abstractmethod
- def get_inspect_module_types(
- self, block: torch.nn.Module
- ) -> Tuple[Type[torch.nn.Module], ...]:
- """Returns the type of inspect modules in transformer block."""
-
- def _register_pre_scaler(
- self,
- linear: torch.nn.Module,
- ) -> AWQScaler:
- """Register pre-scaler for storing AWQ scale in modules."""
- scaler = AWQScaler(linear.in_features) # type: ignore
-
- def pre_scaler_hook(_, x: Tuple[Any, ...]) -> Tuple[torch.Tensor, ...]:
- return (scaler(x[0]),)
-
- linear.register_forward_pre_hook(pre_scaler_hook)
- return scaler
-
- def get_quant_result(
- self,
- quant_inputs: TFQuantInputs,
- **kwargs: Any,
- ) -> TFQuantResults:
- """Get quantization result for AWQ."""
- awq_config = cast(AWQConfig, self.quant_config)
-
- def get_scale(
- quant_input: QuantInput,
- ) -> WeightOnlyQuantResult:
- weight, name, start, end = (
- quant_input.weight,
- quant_input.name,
- quant_input.start_offset,
- quant_input.end_offset,
- )
- weight = weight.to(awq_config.device)
-
- return get_weight_only_quant_scales(
- layer_name=name,
- w=weight[start:end],
- q_bit=awq_config.awq_args.quant_bit,
- q_group_size=awq_config.awq_args.quant_group_size,
- )
-
- return TFQuantResults(
- layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_inputs.layer_index}.",
- block=quant_inputs.block,
- q=get_scale(quant_inputs.q),
- k=get_scale(quant_inputs.k),
- v=get_scale(quant_inputs.v),
- attn_fc=get_scale(quant_inputs.attn_fc),
- ff1=get_scale(quant_inputs.ff1),
- ff2=get_scale(quant_inputs.ff2),
- )
-
- @property
- def quant_dtype(self) -> ModelDataType:
- """Return the quantization dtype."""
- quant_config = cast(AWQConfig, self.quant_config)
- awq_args = quant_config.awq_args
- if awq_args.quant_bit == 4:
- return ModelDataType.INT4
- return ModelDataType.INT8
-
- @property
- @abstractmethod
- def avoid_clipping_layer_names(self) -> List[str]:
- """Return the layer names to avoid clipping."""
-
- @property
- @abstractmethod
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified layers."""
-
- @property
- def quantized_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for quantized layers."""
- convert_info_list = []
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[
- f"{layer_prefix}q.weight_scale",
- f"{layer_prefix}k.weight_scale",
- f"{layer_prefix}v.weight_scale",
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/awq/scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}q.zeros",
- f"{layer_prefix}k.zeros",
- f"{layer_prefix}v.zeros",
- ],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_attn/awq/zero:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}q.weight",
- f"{layer_prefix}k.weight",
- f"{layer_prefix}v.weight",
- ],
- data_type=self.quant_dtype,
- converted_name=f"{converted_prefix}attn/c_attn/awq/weight:0",
- reshape_fn=quantized_qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc.weight_scale"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/awq/scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc.zeros"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}attn/c_proj/awq/zero:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc.weight"],
- data_type=self.quant_dtype,
- converted_name=f"{converted_prefix}attn/c_proj/awq/weight:0",
- reshape_fn=quantized_linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff1.weight_scale"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/awq/scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff1.zeros"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_fc/awq/zero:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff1.weight"],
- data_type=self.quant_dtype,
- converted_name=f"{converted_prefix}mlp/c_fc/awq/weight:0",
- reshape_fn=quantized_linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2.weight_scale"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/awq/scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2.zeros"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_proj/awq/zero:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2.weight"],
- data_type=self.quant_dtype,
- converted_name=f"{converted_prefix}mlp/c_proj/awq/weight:0",
- reshape_fn=quantized_linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
-
-class AWQQuantizer(CommonQuantizer):
- """Quantizer for AWQ."""
-
- def check_config(self) -> None:
- """Check if the AWQ quantization config is valid."""
- super().check_config()
- quant_config = cast(AWQConfig, self.quant_config)
- awq_args = quant_config.awq_args
- if awq_args.quant_bit not in [4, 8]:
- raise QuantizationError(
- f"Invalid quant_bit {awq_args.quant_bit} for AWQ."
- "You can only use 4 or 8 bit for AWQ."
- )
- if awq_args.quant_group_size not in [64]:
- raise QuantizationError(
- f"Invalid quant_group_size {awq_args.quant_group_size} for AWQ."
- "You can only use 64 for AWQ."
- )
-
- def get_calib_dataset(self) -> datasets.Dataset:
- """Get calibration dataset for AWQ."""
- data_cfg = self.quant_config.calibration_dataset
- tokenizer = get_tokenizer(self.converter.config.name_or_path)
- dataset = safe_load_datasets(data_cfg)
-
- def preprocess(sample) -> Dict[str, Any]:
- """Preprocess dataset for AWQ."""
- return {"input_ids": tokenizer(sample).input_ids}
-
- disable_progress_bar()
- dataset = (
- dataset.shuffle(self.quant_config.seed)
- .select(range(data_cfg.num_samples))
- .map(function=preprocess, input_columns=data_cfg.lookup_column_name)
- .filter(
- lambda sample: torch.tensor(sample).numel() != 0,
- input_columns="input_ids",
- )
- )
-
- return dataset
-
- def get_batched_samples(self):
- """Get batched samples from dataset."""
- dataset = self.get_calib_dataset()
- seqlen = self.quant_config.calibration_dataset.max_length
- samples = []
- for sample in dataset["input_ids"]:
- samples.append(torch.tensor(sample[:seqlen]))
-
- batched_samples = torch.cat(samples)
- if len(batched_samples) // seqlen == 0:
- return batched_samples.unsqueeze(0)
-
- batched_samples = [
- batched_samples[i * seqlen : (i + 1) * seqlen].unsqueeze(0)
- for i in range(len(batched_samples) // seqlen)
- ]
- batched_samples = torch.cat(batched_samples, dim=0)
- return batched_samples
-
- def _apply_awq_scale_clip_block(
- self,
- block: torch.nn.Module,
- block_args: Tuple[Any, ...],
- block_kwargs: Dict[str, Any],
- ) -> None:
- """Search AWQ scale, clipping range and Apply them into a transformer block."""
- # pylint: disable=too-many-locals
-
- inpsected_mod_types = cast(AWQHook, self.hook).get_inspect_module_types(block)
- args_dict, kwargs_dict = collect_inps(
- block,
- block_args,
- block_kwargs,
- self.quant_config.device,
- tuple([*self.hook.get_linear_layer_types(), *inpsected_mod_types]),
- )
- awq_args = cast(AWQConfig, self.quant_config).awq_args
- for prev_ops, linear_tuples, module2inspect, module2inspect_name in cast(
- AWQHook, self.hook
- ).iter_inspect_modules(block):
- linear_inp = args_dict[linear_tuples[0][0]][0]
- linear_layers = [linear for _, linear in linear_tuples]
-
- scales = search_module_scale(
- module2inspect,
- args_dict[module2inspect_name],
- kwargs_dict[module2inspect_name],
- linear_layers,
- linear_inp,
- awq_args.quant_group_size,
- awq_args.quant_bit,
- )
-
- apply_module_scale(
- prev_ops,
- linear_layers,
- scales.to(self.quant_config.device),
- )
-
- for name, _ in linear_tuples:
- assert len(args_dict[name]) == 1
- assert torch.equal(args_dict[name][0], linear_inp)
- args_dict[name] = (args_dict[name][0].div(scales.view(1, -1)),)
-
- named_linears = {
- name: m
- for name, m in block.named_modules()
- if isinstance(m, torch.nn.Linear)
- }
- for name, linear in named_linears.items():
- if any(
- (
- avoid in name
- for avoid in cast(AWQHook, self.hook).avoid_clipping_layer_names
- )
- ):
- continue
- max_val = search_module_clip(
- linear.weight,
- args_dict[name][0],
- awq_args.quant_group_size,
- awq_args.quant_bit,
- n_sample_token=self.quant_config.calibration_dataset.num_samples,
- )
- apply_module_clip(
- max_val.to(self.quant_config.device),
- linear,
- )
-
- def get_input_kwargs_tf_blocks(
- self,
- model: torch.nn.Module,
- ) -> Tuple[List[Tuple[Any, ...]], List[Dict[str, Any]]]:
- """Gather input tensor and kwargs from the designated pytorch module."""
- block_args = []
- block_kwargs = []
-
- num_tf_blocks = len(self.hook.get_tf_blocks(model))
- progress_bar = tqdm(
- range(num_tf_blocks),
- total=num_tf_blocks,
- desc="Collect args for transformer blocks..",
- )
-
- def hook(m, args, kwargs): # pylint: disable=unused-argument
- block_args.append(
- tuple(
- (t.detach().cpu() if isinstance(t, torch.Tensor) else t)
- for t in args
- )
- )
- block_kwargs.append(
- {
- k: (v.detach().cpu() if isinstance(v, torch.Tensor) else v)
- for k, v in kwargs.items()
- }
- )
- progress_bar.update()
-
- removables = []
- for tf_block in self.hook.get_tf_blocks(model):
- removables.append(
- tf_block.register_forward_pre_hook(hook, with_kwargs=True)
- )
-
- batched_samples = self.get_batched_samples()
- model(batched_samples.to(self.quant_config.device), use_cache=False)
-
- for removable in removables:
- removable.remove()
-
- return block_args, block_kwargs
-
- def get_attributes(self) -> Dict[str, Any]:
- """Return the attributes of the converted model."""
- attributes = self.converter.get_attributes()
- awq_args = cast(AWQConfig, self.quant_config).awq_args
- attributes["quant_scheme"] = self.quant_config.mode.value # awq
- attributes["quant_group_size"] = awq_args.quant_group_size
- attributes["quant_bit"] = awq_args.quant_bit
- return attributes
-
- @torch.no_grad()
- def _apply_awq_scale_clip(
- self,
- model: torch.nn.Module,
- ) -> None:
- """Search AWQ scale, clipping range and Apply them into model."""
- # pylint: disable=too-many-locals
- model.eval()
- with self._try_offload_model(model):
- tf_blocks = self.hook.get_tf_blocks(model)
- block_args, block_kwargs = self.get_input_kwargs_tf_blocks(model)
-
- gc.collect()
- torch.cuda.empty_cache()
-
- for block, args, kwargs in tqdm(
- zip(
- tf_blocks,
- block_args,
- block_kwargs,
- ),
- total=len(tf_blocks),
- desc="Search and Apply AWQ Scale, Clip range..",
- ):
- self._apply_awq_scale_clip_block(block, args, kwargs)
- gc.collect()
- torch.cuda.empty_cache()
-
- @torch.no_grad()
- def pre_quantize(
- self,
- model: torch.nn.Module,
- ) -> None:
- """Pre-procedure that should be called before quantize() is called."""
- model = cast(AWQHook, self.hook).add_pre_scaler(model)
- self._apply_awq_scale_clip(model)
-
- @torch.no_grad()
- def quantize(
- self,
- model: torch.nn.Module,
- ) -> torch.nn.Module:
- """Quantize model with AWQ."""
- model.eval()
- for quant_input in tqdm(
- self.hook.iter_tf_quant_inputs(model),
- total=len(self.hook.get_tf_blocks(model)),
- desc="Quantize model..",
- ):
- assert isinstance(quant_input, TFQuantInputs)
- quant_result = cast(AWQHook, self.hook).get_quant_result(
- quant_input, quant_config=cast(AWQConfig, self.quant_config)
- )
- for field in fields(quant_result):
- layer_quant_result = getattr(quant_result, field.name)
- if isinstance(layer_quant_result, WeightOnlyQuantResult):
- layer = model.get_submodule(layer_quant_result.module_name)
- q_layer = WeightOnlyQuantizedLinearLayer.from_layer(
- layer, layer_quant_result
- )
- quant_result.block.add_module(field.name, q_layer)
-
- return model
diff --git a/friendli/modules/quantizer/awq/models/gpt_neox.py b/friendli/modules/quantizer/awq/models/gpt_neox.py
deleted file mode 100644
index 8d48328a..00000000
--- a/friendli/modules/quantizer/awq/models/gpt_neox.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPTNeoXForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type
-
-import torch
-
-from friendli.enums import ModelDataType
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.quantizer.awq.base import AWQHook
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.utils import scale_reshape
-
-
-class AWQGPTNeoXHook(AWQHook):
- """AWQ Hook for GPTNeoXForCausalLM."""
-
- def __init__(self, quant_config, converter):
- """Initialize AWQGPTNeoXHook."""
- super().__init__(quant_config, converter)
- config = converter.config
- self.data_type = converter.data_type
- self.num_attention_heads = config.num_attention_heads
- self.num_kv_attention_heads = config.num_attention_heads
- self.hidden_size = config.hidden_size
- self.head_size = self.hidden_size // self.num_attention_heads
- self.rotary_dim = int(self.head_size * config.rotary_pct)
- assert config.use_parallel_residual == True
-
- def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module:
- """Adds scaler to GPTNeoXForCausalLM."""
- for tf_block in self.get_tf_blocks(model):
- attn_fc_scaler = self._register_pre_scaler(
- tf_block.attention.dense,
- )
- tf_block.attention.add_module("scaler", attn_fc_scaler)
- ff2_scaler = self._register_pre_scaler(tf_block.mlp.dense_4h_to_h)
- tf_block.mlp.add_module("scaler", ff2_scaler)
- return model
-
- def get_inspect_module_types(
- self, block: torch.nn.Module
- ) -> Tuple[Type[torch.nn.Module], ...]:
- """Returns the type of linear layer (etc. qkv, linear layer) in transformer block."""
- return (type(block.attention), type(block.mlp))
-
- def iter_inspect_modules(
- self,
- block: torch.nn.Module,
- ) -> Iterator[
- Tuple[
- List[torch.nn.Module],
- List[Tuple[ModuleName, torch.nn.Linear]],
- torch.nn.Module,
- ModuleName,
- ]
- ]:
- """Returns iterator of layers in modules."""
- # qkv proj
- yield (
- [block.input_layernorm],
- [("attention.query_key_value", block.attention.query_key_value)],
- block.attention,
- "attention",
- )
- # attn out proj
- yield (
- [block.attention.scaler],
- [("attention.dense", block.attention.dense)],
- block.attention.dense,
- "attention.dense",
- )
- # ff1
- yield (
- [block.post_attention_layernorm],
- [("mlp.dense_h_to_4h", block.mlp.dense_h_to_4h)],
- block.mlp,
- "mlp",
- )
- # ff2
- yield (
- [block.mlp.scaler],
- [("mlp.dense_4h_to_h", block.mlp.dense_4h_to_h)],
- block.mlp.dense_4h_to_h,
- "mlp.dense_4h_to_h",
- )
-
- def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of GPTNeoXForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- qkv_weight = self.converter.qkv_weight_reshape(
- [decoder_layer.attention.query_key_value.weight]
- ).transpose(
- 0, 1
- ) # [OutDim, InDim]
- attn_weight_outdim = qkv_weight.size(0) # OutDim
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attention.query_key_value",
- 0,
- attn_weight_outdim // 3,
- ),
- k=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attention.query_key_value",
- attn_weight_outdim // 3,
- attn_weight_outdim // 3 * 2,
- ),
- v=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attention.query_key_value",
- attn_weight_outdim // 3 * 2,
- attn_weight_outdim,
- ),
- attn_fc=QuantInput(
- decoder_layer.attention.dense.weight,
- f"{self.quantized_layer_prefix}{index}.attention.dense",
- None,
- None,
- ),
- ff1=QuantInput(
- decoder_layer.mlp.dense_h_to_4h.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h",
- None,
- None,
- ),
- ff2=QuantInput(
- decoder_layer.mlp.dense_4h_to_h.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in GPTNeoXForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in GPTNeoXForCausalLM."""
- return model.gpt_neox.layers # type: ignore
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified layers."""
- convert_info_list = []
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}attention.scaler.scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_proj/awq/pre_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.scaler.scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_proj/awq/pre_scale:0",
- reshape_fn=scale_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def avoid_clipping_layer_names(self) -> List[str]:
- """Returns the layer names which should be avoided for AWQ clipping."""
- return ["query_key_value"]
diff --git a/friendli/modules/quantizer/awq/models/gptj.py b/friendli/modules/quantizer/awq/models/gptj.py
deleted file mode 100644
index da2e81dc..00000000
--- a/friendli/modules/quantizer/awq/models/gptj.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPTJForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type
-
-import torch
-
-from friendli.enums import ModelDataType
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.quantizer.awq.base import AWQHook
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.utils import scale_reshape
-
-
-class AWQGPTJHook(AWQHook):
- """AWQ Hook for GPTJForCausalLM."""
-
- def __init__(self, quant_config, converter):
- """Initialize AWQGPTJHook."""
- super().__init__(quant_config, converter)
- config = converter.config
- self.data_type = converter.data_type
- self.num_attention_heads = config.num_attention_heads
- self.num_kv_attention_heads = config.num_attention_heads
- self.hidden_size = config.hidden_size
- self.head_size = self.hidden_size // self.num_attention_heads
- self.rotary_dim = config.rotary_dim
-
- def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module:
- """Adds scaler to GPTJForCausalLM."""
- for tf_block in self.get_tf_blocks(model):
- ff2_scaler = self._register_pre_scaler(tf_block.mlp.fc_out)
- tf_block.mlp.add_module("ff2_scaler", ff2_scaler)
- return model
-
- def get_inspect_module_types(
- self, block: torch.nn.Module
- ) -> Tuple[Type[torch.nn.Module], ...]:
- """Returns the type of linear layer (etc. qkv, linear layer) in transformer block."""
- return (type(block.attn), type(block.mlp), type(block))
-
- def iter_inspect_modules(
- self,
- block: torch.nn.Module,
- ) -> Iterator[
- Tuple[
- List[torch.nn.Module],
- List[Tuple[ModuleName, torch.nn.Linear]],
- torch.nn.Module,
- ModuleName,
- ]
- ]:
- """Returns iterator of layers in modules."""
- # qkv proj
- yield (
- [block.ln_1],
- [
- ("attn.q_proj", block.attn.q_proj),
- ("attn.k_proj", block.attn.k_proj),
- ("attn.v_proj", block.attn.v_proj),
- ("mlp.fc_in", block.mlp.fc_in),
- ],
- block,
- "",
- )
- # attn out proj
- yield (
- [block.attn.v_proj],
- [("attn.out_proj", block.attn.out_proj)],
- block.attn.out_proj,
- "attn.out_proj",
- )
- # ff2
- yield (
- [block.mlp.ff2_scaler],
- [("mlp.fc_out", block.mlp.fc_out)],
- block.mlp.fc_out,
- "mlp.fc_out",
- )
-
- def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of GPTJForCausalLM."""
- for index, tf_block in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- yield TFQuantInputs(
- layer_index=index,
- block=tf_block,
- q=QuantInput(
- tf_block.attn.q_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.q_proj",
- None,
- None,
- ),
- k=QuantInput(
- tf_block.attn.k_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.k_proj",
- None,
- None,
- ),
- v=QuantInput(
- tf_block.attn.v_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.v_proj",
- None,
- None,
- ),
- attn_fc=QuantInput(
- tf_block.attn.out_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- tf_block.mlp.fc_in.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.fc_in",
- None,
- None,
- ),
- ff2=QuantInput(
- tf_block.mlp.fc_out.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.fc_out",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in GPTJForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in GPTJForCausalLM."""
- return model.transformer.h # type: ignore
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified layers."""
- convert_info_list = []
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.append(
- ConvertInfo(
- param_names=[f"{layer_prefix}mlp.ff2_scaler.scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_proj/awq/pre_scale:0",
- reshape_fn=scale_reshape,
- )
- )
- return convert_info_list
-
- @property
- def avoid_clipping_layer_names(self) -> List[str]:
- """Returns the layer names which should be avoided for AWQ clipping."""
- return ["q_proj", "k_proj"]
diff --git a/friendli/modules/quantizer/awq/models/llama.py b/friendli/modules/quantizer/awq/models/llama.py
deleted file mode 100644
index f59bc0cf..00000000
--- a/friendli/modules/quantizer/awq/models/llama.py
+++ /dev/null
@@ -1,301 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli LlamaForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any, Iterator, List, Tuple, Type, cast
-
-import torch
-
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.quantizer.awq.base import AWQHook
-from friendli.modules.quantizer.schema.config import AWQConfig
-from friendli.modules.quantizer.schema.data import (
- ModuleName,
- QuantInput,
- TFQuantInputs,
- TFQuantResults,
- WeightOnlyQuantResult,
-)
-from friendli.modules.quantizer.utils import (
- get_weight_only_quant_scales,
- quantized_linear_weight_reshape,
- scale_reshape,
-)
-
-
-@dataclass
-class LlamaTFQuantInputs(TFQuantInputs):
- """Dataclass for quantization input per layer in LlamaForCausalLM."""
-
- ff_gate: QuantInput
-
-
-@dataclass
-class LlamaTFQuantResults(TFQuantResults):
- """Dataclass for quantization result per layer in LlamaForCausalLM."""
-
- ff_gate: WeightOnlyQuantResult
-
-
-class AWQLlamaHook(AWQHook):
- """AWQ Hook for LlamaForCausalLM."""
-
- def __init__(self, quant_config, converter):
- """Initialize AWQLlamaHook."""
- super().__init__(quant_config, converter)
- config = converter.config
- self.data_type = converter.data_type
- self.num_attention_heads = config.num_attention_heads
- if config.num_key_value_heads is None:
- self.num_kv_attention_heads = self.num_attention_heads
- else:
- self.num_kv_attention_heads = config.num_key_value_heads
- self.hidden_size = config.hidden_size
- self.head_size = self.hidden_size // self.num_attention_heads
- self.rotary_dim = self.head_size
- self.scale_attn_fc = self.num_attention_heads == self.num_kv_attention_heads
-
- def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module:
- """Adds scaler to LlamaForCausalLM."""
- return model
-
- def get_inspect_module_types(
- self, block: torch.nn.Module
- ) -> Tuple[type[torch.nn.Module], ...]:
- """Returns the layer types in inspected blocks."""
- return (type(block.self_attn), type(block.mlp))
-
- def iter_inspect_modules(
- self,
- block: torch.nn.Module,
- ) -> Iterator[
- Tuple[
- List[torch.nn.Module],
- List[Tuple[ModuleName, torch.nn.Linear]],
- torch.nn.Module,
- ModuleName,
- ]
- ]:
- """Returns iterator of layers in blocks."""
- # qkv proj
- yield (
- [block.input_layernorm],
- [
- ("self_attn.q_proj", block.self_attn.q_proj),
- ("self_attn.k_proj", block.self_attn.k_proj),
- ("self_attn.v_proj", block.self_attn.v_proj),
- ],
- block.self_attn,
- "self_attn",
- )
- # attn out proj
- if self.scale_attn_fc:
- yield (
- [block.self_attn.v_proj],
- [("self_attn.o_proj", block.self_attn.o_proj)],
- block.self_attn.o_proj,
- "self_attn.o_proj",
- )
- # ff1
- yield (
- [block.post_attention_layernorm],
- [
- ("mlp.up_proj", block.mlp.up_proj),
- ("mlp.gate_proj", block.mlp.gate_proj),
- ],
- block.mlp,
- "mlp",
- )
- # ff2
- yield (
- [block.mlp.up_proj],
- [("mlp.down_proj", block.mlp.down_proj)],
- block.mlp.down_proj,
- "mlp.down_proj",
- )
-
- def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of LlamaForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.self_attn
- q_weight, k_weight, v_weight = (
- self.converter.qkv_weight_reshape(
- [
- self_attn.q_proj.weight,
- self_attn.k_proj.weight,
- self_attn.v_proj.weight,
- ]
- )
- .transpose(0, 1)
- .split(
- [
- self.converter.decoder_num_attention_heads
- * self.converter.decoder_head_size,
- self.converter.decoder_num_kv_attention_heads
- * self.converter.decoder_head_size,
- self.converter.decoder_num_kv_attention_heads
- * self.converter.decoder_head_size,
- ],
- dim=0,
- )
- )
- fc1 = decoder_layer.mlp.up_proj
- ff_gate = decoder_layer.mlp.gate_proj
- fc2 = decoder_layer.mlp.down_proj
-
- yield LlamaTFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- q_weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj",
- None,
- None,
- ),
- k=QuantInput(
- k_weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.k_proj",
- None,
- None,
- ),
- v=QuantInput(
- v_weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.v_proj",
- None,
- None,
- ),
- attn_fc=QuantInput(
- self_attn.o_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.up_proj",
- None,
- None,
- ),
- ff_gate=QuantInput(
- ff_gate.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.gate_proj",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.down_proj",
- None,
- None,
- ),
- )
-
- def get_quant_result(
- self,
- quant_input: TFQuantInputs,
- **kwargs: Any,
- ) -> TFQuantResults:
- """Get quantization result for a specific layer in LlamaForCausalLM."""
- awq_config = cast(AWQConfig, self.quant_config)
-
- def get_scale(quant_input: QuantInput) -> WeightOnlyQuantResult:
- weight, name, start, end = (
- quant_input.weight,
- quant_input.name,
- quant_input.start_offset,
- quant_input.end_offset,
- )
- weight = weight.to(awq_config.device)
-
- return get_weight_only_quant_scales(
- layer_name=name,
- w=weight[start:end],
- q_bit=awq_config.awq_args.quant_bit,
- q_group_size=awq_config.awq_args.quant_group_size,
- )
-
- quant_input = cast(LlamaTFQuantInputs, quant_input)
- return LlamaTFQuantResults(
- layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_input.layer_index}.",
- block=quant_input.block,
- q=get_scale(quant_input.q),
- k=get_scale(quant_input.k),
- v=get_scale(quant_input.v),
- attn_fc=get_scale(quant_input.attn_fc),
- ff1=get_scale(quant_input.ff1),
- ff_gate=get_scale(quant_input.ff_gate),
- ff2=get_scale(quant_input.ff2),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in LlamaForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in LlamaForCausalLM."""
- return model.model.layers
-
- @property
- def quantized_param_names(self) -> List[str]:
- """Returns the parameter names in LlamaForCausalLM."""
- param_names = super().quantized_param_names
- for i in range(self.converter.decoder_layer_num):
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- param_names.append(
- f"{converted_prefix}mlp/c_gate/weight:0",
- )
- return param_names
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified layers."""
- return []
-
- @property
- def avoid_clipping_layer_names(self) -> List[str]:
- """Returns the layer names which should be avoided for AWQ clipping."""
- return ["q_proj", "k_proj"]
-
- @property
- def quantized_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the convert_info_list for quantized layers."""
- convert_info_list = super().quantized_convert_info_list
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
-
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ff_gate.weight_scale"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_gate/awq/scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff_gate.zeros"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}mlp/c_gate/awq/zero:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff_gate.weight"],
- data_type=self.quant_dtype,
- converted_name=f"{converted_prefix}mlp/c_gate/awq/weight:0",
- reshape_fn=quantized_linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
diff --git a/friendli/modules/quantizer/awq/models/mpt.py b/friendli/modules/quantizer/awq/models/mpt.py
deleted file mode 100644
index 6c60ca58..00000000
--- a/friendli/modules/quantizer/awq/models/mpt.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli MPTForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type
-
-import torch
-
-from friendli.enums import ModelDataType
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.quantizer.awq.base import AWQHook
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.utils import scale_reshape
-
-
-class AWQMPTHook(AWQHook):
- """AWQ Hook for MPTForCausalLM."""
-
- def add_pre_scaler(self, model: torch.nn.Module) -> torch.nn.Module:
- """Adds scaler to MPTForCausalLM."""
- for tf_block in self.get_tf_blocks(model):
- attn_fc_scaler = self._register_pre_scaler(
- tf_block.attn.out_proj,
- )
- tf_block.attn.add_module("scaler", attn_fc_scaler)
- ff2_scaler = self._register_pre_scaler(tf_block.ffn.down_proj)
- tf_block.ffn.add_module("scaler", ff2_scaler)
- return model
-
- def get_inspect_module_types(
- self, block: torch.nn.Module
- ) -> Tuple[Type[torch.nn.Module], ...]:
- """Returns the type of linear layer (etc. qkv, linear layer) in transformer block."""
- return (type(block.attn), type(block.ffn))
-
- def iter_inspect_modules(
- self,
- block: torch.nn.Module,
- ) -> Iterator[
- Tuple[
- List[torch.nn.Module],
- List[Tuple[ModuleName, torch.nn.Linear]],
- torch.nn.Module,
- ModuleName,
- ]
- ]:
- """Returns iterator of layers in modules."""
- # qkv proj
- yield (
- [block.norm_1],
- [("attn.Wqkv", block.attn.Wqkv)],
- block.attn,
- "attn",
- )
- # attn out proj
- yield (
- [block.attn.scaler],
- [("attn.out_proj", block.attn.out_proj)],
- block.attn.out_proj,
- "attn.out_proj",
- )
- # ff1
- yield (
- [block.norm_2],
- [("ffn.up_proj", block.ffn.up_proj)],
- block.ffn,
- "ffn",
- )
- # ff2
- yield (
- [block.ffn.scaler],
- [("ffn.down_proj", block.ffn.down_proj)],
- block.ffn.down_proj,
- "ffn.down_proj",
- )
-
- def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of MPTForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.attn
- q_outdim = (
- self.converter.decoder_num_attention_heads
- * self.converter.decoder_head_size
- )
- kv_outdim = (
- self.converter.decoder_num_kv_attention_heads
- * self.converter.decoder_head_size
- )
- qkv_outdim = self_attn.Wqkv.weight.size(0)
- assert qkv_outdim == q_outdim + kv_outdim * 2
- fc1 = decoder_layer.ffn.up_proj # type: ignore
- fc2 = decoder_layer.ffn.down_proj # type: ignore
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- self_attn.Wqkv.weight, # type: ignore
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- 0,
- q_outdim,
- ),
- k=QuantInput(
- self_attn.Wqkv.weight, # type: ignore
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- q_outdim,
- q_outdim + kv_outdim,
- ),
- v=QuantInput(
- self_attn.Wqkv.weight, # type: ignore
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- q_outdim + kv_outdim,
- qkv_outdim,
- ),
- attn_fc=QuantInput(
- self_attn.out_proj.weight, # type: ignore
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight, # type: ignore
- f"{self.quantized_layer_prefix}{index}.ffn.up_proj",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight, # type: ignore
- f"{self.quantized_layer_prefix}{index}.ffn.down_proj",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in MPTForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in MPTForCausalLM."""
- return model.transformer.blocks # type: ignore
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified layers."""
- convert_info_list = []
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}attn.scaler.scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_proj/awq/pre_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ffn.scaler.scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_proj/awq/pre_scale:0",
- reshape_fn=scale_reshape,
- ),
- ]
- )
- return convert_info_list
-
- @property
- def avoid_clipping_layer_names(self) -> List[str]:
- """Returns the layer names which should be avoided for AWQ clipping."""
- return ["Wqkv"]
diff --git a/friendli/modules/quantizer/awq/utils.py b/friendli/modules/quantizer/awq/utils.py
deleted file mode 100644
index c6efdec4..00000000
--- a/friendli/modules/quantizer/awq/utils.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-# Copyright (c) 2023 MIT HAN Lab
-# MIT License
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-"""Friendli AWQ Quantizer Util."""
-
-from __future__ import annotations
-
-import gc
-from typing import Any, Dict, Iterable, List, Tuple
-
-import torch
-
-
-def pseudo_quantize_tensor(w: torch.Tensor, q_bit: int = 8, q_group_size: int = -1):
- """Pseudo quantize tensor."""
- org_w_shape = w.shape
- w = w.reshape(-1, q_group_size)
- max_val = w.amax(dim=1, keepdim=True)
- min_val = w.amin(dim=1, keepdim=True)
- max_int = 2**q_bit - 1
- min_int = 0
- scales = (max_val - min_val).clamp(min=1e-5) / max_int
- zeros = (-torch.round(min_val / scales)).clamp_(min_int, max_int)
-
- assert torch.isnan(scales).sum() == 0
- assert torch.isnan(w).sum() == 0
-
- w = (
- torch.clamp(torch.round(w / scales) + zeros, min_int, max_int) - zeros
- ) * scales
- assert torch.isnan(w).sum() == 0
-
- w = w.reshape(org_w_shape)
-
- return w
-
-
-@torch.no_grad()
-def get_weight_scale(weight: torch.Tensor, q_group_size=-1):
- """Get weight scale for AWQ."""
- org_shape = weight.shape
- if q_group_size > 0:
- weight = weight.view(-1, q_group_size)
- scale = weight.abs() / weight.abs().amax(dim=1, keepdim=True)
- scale = scale.view(org_shape)
- scale = scale.mean(0)
- return scale
-
-
-@torch.no_grad()
-def get_act_scale(x):
- """Get activation scale for AWQ."""
- return x.abs().view(-1, x.shape[-1]).mean(0)
-
-
-def search_module_scale(
- module: torch.nn.Module,
- module_args: Tuple[Any, ...],
- module_kwargs: Dict[str, Any],
- linears2scale: Iterable[torch.nn.Linear],
- linear_inp: torch.Tensor,
- q_group_size: int,
- q_bit: int,
-) -> torch.Tensor:
- """Search the AWQ scale for a module."""
- # pylint: disable=too-many-locals
- weight = torch.cat([_m.weight for _m in linears2scale], dim=0) # type: ignore
- with torch.no_grad():
- org_out = module(*module_args, **module_kwargs)
- if isinstance(org_out, tuple):
- org_out = org_out[0]
-
- x_max = get_act_scale(linear_inp)
- w_max = get_weight_scale(weight, q_group_size)
- del weight
- gc.collect() # type: ignore
- torch.cuda.empty_cache()
-
- best_error = float("inf")
- best_scales = torch.zeros(x_max.shape[0], device=x_max.device)
- n_grid = 20
- history = []
- org_sd = {k: v.to("cpu", copy=True) for k, v in module.state_dict().items()}
- for grid in range(n_grid):
- ratio = grid * 1.0 / n_grid
- scales = (x_max.pow(ratio) / w_max.pow(1 - ratio)).clamp(min=1e-4).view(-1)
- scales = scales / (scales.max() * scales.min()).sqrt()
- for fc in linears2scale:
- fc.weight.mul_(scales.view(1, -1).to(fc.weight.device)) # type: ignore
- fc.weight.data = pseudo_quantize_tensor(
- w=fc.weight.data, # type: ignore
- q_bit=q_bit,
- q_group_size=q_group_size,
- ) / (scales.view(1, -1))
-
- out = module(*module_args, **module_kwargs)
- if isinstance(out, tuple):
- out = out[0]
-
- loss = (org_out - out).float().pow(2).mean().item() # float prevents overflow
- history.append(loss)
- is_best = loss < best_error
- if is_best:
- best_error = loss
- best_scales = scales
- module.load_state_dict(org_sd)
- best_scales = best_scales.view(-1)
-
- assert torch.isnan(best_scales).sum() == 0, best_scales
- return best_scales.detach()
-
-
-def apply_module_scale(
- prev_ops: List[torch.nn.Module],
- linear_layers: Iterable[torch.nn.Linear],
- scales: torch.Tensor,
-) -> None:
- """Apply AWQ Scale for Module, and return the scaled input for Clipping."""
- for prev_op in prev_ops:
- for _, param in prev_op.named_parameters(recurse=False):
- if isinstance(prev_op, torch.nn.Linear):
- # TODO: handle bias
- assert len(param.data.shape) == 2
- param.data.div_(scales.view(-1, 1))
- else:
- assert param.data.shape == scales.shape
- param.data.div_(scales)
-
- for layer in linear_layers:
- layer.weight.data.mul_(scales.view(1, -1))
-
-
-def search_module_clip(
- w: torch.Tensor,
- inp: torch.Tensor,
- q_group_size: int,
- q_bit: int,
- n_grid=20,
- max_shrink=0.5,
- n_sample_token=512,
-) -> torch.Tensor:
- """Search the best clip for a module."""
- # pylint: disable=too-many-locals
- # w [co, ci] -> [co, 1, n_group, group size]
- # inp [n_token, ci] -> [1, n_token, n_group, group size]
- w = w.view(w.shape[0], 1, -1, q_group_size)
-
- inp = inp.view(-1, inp.shape[-1])
- inp = inp.reshape(1, inp.shape[0], -1, q_group_size)
- inp = inp[:, 0 :: inp.shape[1] // n_sample_token]
-
- oc_batch_size = 256 if w.shape[0] % 256 == 0 else 64 # prevent OOM
- assert w.shape[0] % oc_batch_size == 0
- w_all = w
- best_max_val_all = []
-
- for i_b in range(w.shape[0] // oc_batch_size):
- w = w_all[i_b * oc_batch_size : (i_b + 1) * oc_batch_size]
-
- org_max_val = w.abs().amax(dim=-1, keepdim=True) # co, 1, n_group, 1
-
- best_max_val = org_max_val.clone()
- min_errs = torch.ones_like(org_max_val) * 1e9
- inp = inp.to(w.device)
- org_out = (inp * w).sum(dim=-1) # co, n_token, n_group
-
- for i_s in range(int(max_shrink * n_grid)):
- max_val = org_max_val * (1 - i_s / n_grid)
- min_val = -max_val
- cur_w = torch.clamp(w, min_val, max_val)
- q_w = pseudo_quantize_tensor(
- w=cur_w,
- q_bit=q_bit,
- q_group_size=q_group_size,
- )
- cur_out = (inp * q_w).sum(dim=-1)
-
- # co, 1, n_group, 1
- err = (cur_out - org_out).pow(2).mean(dim=1).view(min_errs.shape)
- del cur_w
- del cur_out
- cur_best_idx = err < min_errs
- min_errs[cur_best_idx] = err[cur_best_idx]
- best_max_val[cur_best_idx] = max_val[cur_best_idx]
- best_max_val_all.append(best_max_val)
-
- best_max_val = torch.cat(best_max_val_all, dim=0)
-
- del inp
- del org_out
- gc.collect()
- torch.cuda.empty_cache()
-
- return best_max_val.squeeze(1)
-
-
-def apply_module_clip(
- max_val: torch.Tensor,
- layer: torch.nn.Linear,
-):
- """Apply AWQ Clip for Module."""
- max_val = max_val.to(layer.weight.device) # type: ignore
- org_shape = layer.weight.shape
- layer.weight.data = layer.weight.data.reshape(*max_val.shape[:2], -1) # type: ignore
- layer.weight.data = torch.clamp(layer.weight.data, -max_val, max_val)
- layer.weight.data = layer.weight.data.reshape(org_shape) # type: ignore
diff --git a/friendli/modules/quantizer/base.py b/friendli/modules/quantizer/base.py
deleted file mode 100644
index ea97e092..00000000
--- a/friendli/modules/quantizer/base.py
+++ /dev/null
@@ -1,507 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Quantizer Base."""
-
-from __future__ import annotations
-
-import os
-from abc import ABC, abstractmethod
-from collections.abc import Generator
-from contextlib import contextmanager
-from typing import Any, Dict, Iterator, List, Tuple, Type, Union, cast
-
-import datasets # type: ignore[import]
-import huggingface_hub # type: ignore[import]
-import numpy as np
-import torch
-from torch.nn.modules import Module
-from tqdm import tqdm
-
-from friendli.enums import (
- QuantDatasetFormat, # TODO: move this to friendli/modules/converter/enums.py
-)
-from friendli.enums import ModelDataType
-from friendli.errors import NotSupportedQuantConfigError
-from friendli.logging import logger
-from friendli.modules.converter.base import DECODER_PREFIX, OneOfConverter
-from friendli.modules.converter.interface import ModelConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import get_tokenizer, get_torch_data_type
-from friendli.modules.quantizer.layers import WeightActQuantizedLinearLayer
-from friendli.modules.quantizer.schema.config import OneOfQuantConfig
-from friendli.modules.quantizer.schema.data import (
- HFTFQuantInputs,
- ModuleName,
- TFQuantInputs,
- TFQuantResults,
- WeightActQuantResult,
-)
-from friendli.modules.quantizer.utils import (
- collect_stats,
- offload_module_sequence,
- safe_load_datasets,
- send_model_to_device,
-)
-
-
-class AbstractQuantHook(ABC):
- """Quantization Hook for a specific model architecture."""
-
- def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter):
- """Initialize the Quantization Hook.
-
- Args:
- quant_config: Quantization configuration.
- converter (OneOfConverter): Converter for a specific model architecture.
- """
- self.quant_config = quant_config
- self.converter = converter
-
- @abstractmethod
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks."""
-
- @abstractmethod
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the type of linear layer (etc. qkv, linear layer) in transformer block."""
-
- @abstractmethod
- def iter_tf_quant_inputs(
- self, model: torch.nn.Module
- ) -> Union[Iterator[TFQuantInputs], Iterator[HFTFQuantInputs]]:
- """Returns the layers which should be quantized in transformer blocks."""
-
- @abstractmethod
- def get_quant_result(
- self,
- quant_inputs: TFQuantInputs,
- **kwargs: Any,
- ) -> TFQuantResults:
- """Returns the quantization result of the layer."""
-
- @property
- @abstractmethod
- def quantized_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for quantized layers."""
-
- @property
- @abstractmethod
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified layers."""
-
- @property
- def quantized_layer_prefix(self) -> str:
- """Returns the prefix of the transformer block name."""
- return self.converter.decoder_layer_prefix
-
- @property
- def quantized_param_names(self) -> List[str]:
- """Return the parameter names of quantized layers."""
- param_names = []
- for i in range(self.converter.decoder_layer_num):
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- param_names.append(f"{converted_prefix}attn/c_attn/weight:0")
- param_names.append(f"{converted_prefix}attn/c_proj/weight:0")
- param_names.append(f"{converted_prefix}mlp/c_fc/weight:0")
- param_names.append(f"{converted_prefix}mlp/c_proj/weight:0")
-
- return param_names
-
-
-class AbstractQuantizer(ABC):
- """Abstract Quantizer for a specific model architecture."""
-
- def __init__(
- self,
- hook: AbstractQuantHook,
- config: OneOfQuantConfig,
- converter: OneOfConverter,
- ):
- """Initialize the Quantizer.
-
- Args:
- hook (AbstractQuantHook): Quantization Hook for a specific model architecture
- config (CommonQuantConfig): Quantization configuration.
- converter (OneOfConverter): Converter for a specific model architecture.
-
- """
- self.hook = hook
- self.quant_config = config
- self.converter = converter
-
- @abstractmethod
- def get_calib_dataset(
- self,
- ) -> datasets.Dataset:
- """Get calibration dataset."""
-
- @abstractmethod
- def pre_quantize(
- self,
- model: torch.nn.Module,
- ) -> None:
- """Pre-procedure that should be called before quantize() is called."""
-
- @abstractmethod
- def quantize(
- self,
- model: torch.nn.Module,
- ) -> torch.nn.Module:
- """Setting Quantizer from config and Quantize model."""
-
-
-class CommonQuantizer(AbstractQuantizer, ModelConversionInterface):
- """Common Quantizer."""
-
- def check_config(self) -> None:
- """Check if the quantization config is valid."""
- self.converter.check_config()
- calibration_dataset_config = self.quant_config.calibration_dataset
- data_path_or_name = calibration_dataset_config.path_or_name
- percentile = self.quant_config.percentile
- if percentile <= 0 or percentile > 100:
- raise NotSupportedQuantConfigError(
- invalid_option=str(percentile),
- valid_options=["0 < percentile <= 100"],
- )
- if not os.path.exists(data_path_or_name):
- data_name = data_path_or_name.split(":")[0]
- if data_name not in (
- data.id for data in huggingface_hub.list_datasets(search=data_name)
- ):
- raise NotSupportedQuantConfigError(
- invalid_option=data_name,
- valid_options=["datasets on the huggingface hub", "local path"],
- )
- else:
- if calibration_dataset_config.format not in QuantDatasetFormat:
- raise NotSupportedQuantConfigError(
- invalid_option=calibration_dataset_config.format,
- valid_options=list(QuantDatasetFormat),
- )
- try:
- torch.device(self.quant_config.device)
- except ValueError as err:
- raise NotSupportedQuantConfigError(
- invalid_option=self.quant_config.device,
- valid_options=["cpu", "cuda"],
- ) from err
-
- def get_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Get List of the convert informations for the model."""
- convert_info_list = self.converter.get_convert_info_list()
- new_convert_info_list = []
- for convert_info in convert_info_list:
- if convert_info.converted_name in self.hook.quantized_param_names:
- continue
- new_convert_info_list.append(convert_info)
-
- return (
- new_convert_info_list
- + self.hook.quantized_convert_info_list
- + self.hook.modified_layers_convert_info_list
- )
-
- def get_attributes(self) -> Dict[str, Any]:
- """Return the attributes of the converted model."""
- return self.converter.get_attributes()
-
- @contextmanager
- def _try_offload_model(self, model: torch.nn.Module):
- if not self.quant_config.offload:
- logger.info("Offloading not enabled. Skipping.")
- model.to(self.quant_config.device)
- yield
- else:
- logger.info("Offloading enabled.")
- tf_blocks = self.hook.get_tf_blocks(model)
- send_model_to_device(model, self.quant_config.device, exclude=tf_blocks)
- with offload_module_sequence(tf_blocks, self.quant_config.device):
- yield
-
- def convert(
- self,
- model: torch.nn.Module,
- convert_info_list: List[ConvertInfo],
- save_numpy_format: bool = True,
- ) -> Generator[Tuple[str, Union[np.ndarray, torch.Tensor]], None, None]:
- """Convert Huggingface Model to Friendli format(.h5).
-
- Args:
- model (torch.nn.Module): Huggingface model.
- state_dict (Dict[str, torch.Tensor]):
- Dictionary of mapping of tensor name to tensor
- convert_info_list (List[ConvertInfo]):
- Dictionary of mapping converted params name to conversion functions.
- save_numpy_format (bool, optional): Save the converted tensor in numpy format.
- Defaults to True.
- """
- self.pre_quantize(model)
- model = self.quantize(model)
- yield from self.converter.convert(model, convert_info_list, save_numpy_format)
-
-
-class FP8QuantHook(AbstractQuantHook):
- """Quantization Hook for FP8Quantizer."""
-
- def pre_quantize(self, model: Module) -> torch.nn.Module: # type: ignore[]
- """Pre-procedure that should be called before quantize() is called in FP8Quantizer."""
- return model
-
- def post_quantize(self, model: Module) -> torch.nn.Module:
- """Post-procedure that should be called after quantize() is called in FP8Quantizer."""
- return model
-
- def get_quant_result(
- self, quant_inputs: TFQuantInputs, **kwargs: Any
- ) -> TFQuantResults:
- """Returns the quantization result of the layer."""
- raise NotImplementedError
-
- def get_quantized_param_names(self, model: torch.nn.Module) -> List[str]:
- """Return the parameter names of quantized layers."""
- quantized_param_names = []
- for tf_quant_input in self.iter_tf_quant_inputs(model):
- assert isinstance(tf_quant_input, HFTFQuantInputs)
- for quant_input in tf_quant_input.quant_inputs:
- for target_name in quant_input.target_names:
- quantized_param_names.append(f"{target_name}.weight")
- return quantized_param_names
-
- def get_quantized_param_scale_names(self, model):
- """Return the parameter scale names of quantized layers."""
- quantized_param_scale_names = []
- for tf_quant_input in self.iter_tf_quant_inputs(model):
- assert isinstance(tf_quant_input, HFTFQuantInputs)
- for quant_input in tf_quant_input.quant_inputs:
- for target_name in quant_input.target_names:
- quantized_param_scale_names.append(f"{target_name}.weight_scale")
- quantized_param_scale_names.append(f"{target_name}.in_scale")
- return quantized_param_scale_names
-
- @property
- def quantized_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for quantized layers."""
- raise NotImplementedError
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified layers."""
- raise NotImplementedError
-
-
-class FP8Quantizer(CommonQuantizer):
- """FP8Quantizer for huggingface format.
-
- This quantizer supports per-tensor weight-activation quantization by
- using calibration dataset. It adds quantization scale, and quantized
- parameter to the checkpoint, while preserves parameter shape, and name
- in huggingface checkpoint.
- """
-
- def get_calib_dataset(self) -> datasets.Dataset:
- """Get calibration dataset."""
- data_cfg = self.quant_config.calibration_dataset
- tokenizer = get_tokenizer(self.converter.config.name_or_path)
- dataset = safe_load_datasets(data_cfg)
-
- dataset = (
- dataset.shuffle(self.quant_config.seed)
- .select(range(data_cfg.num_samples))
- .select_columns([data_cfg.lookup_column_name])
- )
-
- encoded_dataset = tokenizer(
- dataset[data_cfg.lookup_column_name],
- return_tensors="pt",
- padding=True,
- truncation=True,
- max_length=data_cfg.max_length,
- )
- return encoded_dataset["input_ids"]
-
- def get_convert_info_list(self) -> List[ConvertInfo]:
- """Not used in FP8Quantizer."""
- return []
-
- def pre_quantize(self, model: Module) -> None:
- """Not used in FP8Quantizer."""
- return None
-
- def _get_weight_act_quantize_results(
- self,
- model: torch.nn.Module,
- names: List[ModuleName],
- max_input_stats: Dict[ModuleName, torch.Tensor],
- ) -> List[WeightActQuantResult]:
- """Get the quantization scales and quantized_weight for a specific layer."""
- assert (
- self.quant_config.quant_dtype == ModelDataType.FP8_E4M3
- ), "currently support fp8_e4m3"
- max_val = 448.0
- min_val = -448.0
- input_max = None
- for name in names:
- input_max = max_input_stats.get(name)
- if input_max is not None:
- break
- assert input_max is not None
- target_weights = [model.get_submodule(name).weight for name in names]
- target_weight = torch.concat(target_weights)
-
- act_scale = float(input_max.detach().abs().max().item()) / float(max_val)
- weight_scale = float(target_weight.detach().abs().max().item()) / float(max_val)
-
- q_weights = [
- (
- (weight.detach().float() / weight_scale)
- .clip(min_val, max_val)
- .to(torch.float8_e4m3fn)
- .view(torch.int8)
- .to("cpu")
- )
- for weight in target_weights
- ]
- return [
- WeightActQuantResult(
- name,
- quant_dtype=self.quant_config.quant_dtype,
- act_scale=torch.tensor(act_scale, dtype=torch.float32),
- weight_scale=torch.tensor(weight_scale, dtype=torch.float32),
- q_weight=q_weight,
- q_group_size=-1,
- zero_point=torch.tensor(0.0),
- )
- for name, q_weight in zip(names, q_weights)
- ]
-
- @torch.no_grad()
- def quantize(
- self,
- model: torch.nn.Module,
- ) -> torch.nn.Module:
- """Quantize model to lower data type. Currently supports FP8."""
- # pylint: disable=too-many-locals
- dataset = self.get_calib_dataset()
- model.eval()
- with self._try_offload_model(model):
- max_input_stats, _ = collect_stats(
- model,
- self.quant_config.device,
- dataset,
- cast(FP8QuantHook, self.hook).get_linear_layer_types(),
- percentile=self.quant_config.percentile,
- tqdm_desc="Collecting stats for Static Quantization.",
- batch_size=32,
- )
- for tf_quant_input in tqdm(
- self.hook.iter_tf_quant_inputs(model),
- total=len(self.hook.get_tf_blocks(model)),
- desc="Quantize",
- unit="layer",
- ):
- assert isinstance(tf_quant_input, HFTFQuantInputs)
- for quant_input in tf_quant_input.quant_inputs:
- parent_module, local_names, names = (
- quant_input.parent_module,
- quant_input.local_names,
- quant_input.target_names,
- )
-
- if isinstance(parent_module, torch.nn.ModuleList):
- # For MoE models with seperate expert layers
- parent_modules_w_local_name = []
- for p_module in parent_module:
- for local_name in local_names:
- parent_modules_w_local_name.append(
- (p_module, local_name)
- )
-
- layers = [
- p_module.get_submodule(local_name)
- for p_module, local_name in parent_modules_w_local_name
- ]
-
- quant_results = self._get_weight_act_quantize_results(
- model,
- names,
- max_input_stats,
- )
- q_layers = [
- WeightActQuantizedLinearLayer.from_layer(
- layer, quant_result
- )
- for layer, quant_result in zip(layers, quant_results)
- ]
- for (p_module, local_name), q_layer in zip(
- parent_modules_w_local_name, q_layers
- ):
- setattr(p_module, local_name, q_layer)
-
- else:
- layers = [
- parent_module.get_submodule(local_name)
- for local_name in local_names
- ]
- quant_results = self._get_weight_act_quantize_results(
- model,
- names,
- max_input_stats,
- )
- q_layers = [
- WeightActQuantizedLinearLayer.from_layer(
- layer, quant_result
- )
- for layer, quant_result in zip(layers, quant_results)
- ]
- for local_name, q_layer in zip(local_names, q_layers):
- setattr(parent_module, local_name, q_layer)
-
- return model
-
- def convert( # type: ignore[override]
- self,
- model: torch.nn.Module,
- convert_info_list: List[ConvertInfo],
- save_numpy_format: bool = False,
- ) -> Generator[Tuple[str, Union[torch.Tensor, np.ndarray]], None, None]:
- """Convert Huggingface Model to Friendli format(.h5).
-
- Args:
- model (torch.nn.Module): Huggingface model.
- state_dict (Dict[str, torch.Tensor]):
- Dictionary of mapping of tensor name to tensor
- convert_info_list (List[ConvertInfo]):
- Dictionary of mapping converted params name to conversion functions.
- It will be depreciated.
- save_numpy_format (bool, optional): Save the converted tensor in numpy format.
- It will be depreciated.
- """
- model = cast(FP8QuantHook, self.hook).pre_quantize(model)
- model = self.quantize(model)
- model = cast(FP8QuantHook, self.hook).post_quantize(model)
- state_dict: Dict[str, torch.Tensor] = model.state_dict()
-
- quantized_param_names = cast(FP8QuantHook, self.hook).get_quantized_param_names(
- model
- )
- quantized_param_names.extend(
- cast(FP8QuantHook, self.hook).get_quantized_param_scale_names(model)
- )
-
- with tqdm(total=len(state_dict), desc="Converting", unit="tensor") as pbar:
- for param_name, param in state_dict.items():
- if param_name not in quantized_param_names:
- param = param.to(get_torch_data_type(self.converter.data_type))
- yield param_name, param
- pbar.update()
diff --git a/friendli/modules/quantizer/layers.py b/friendli/modules/quantizer/layers.py
deleted file mode 100644
index 31d104b1..00000000
--- a/friendli/modules/quantizer/layers.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Quantization Layers."""
-
-from __future__ import annotations
-
-from typing import Optional, cast
-
-import torch
-
-from friendli.modules.quantizer.schema.data import (
- CommonQuantResult,
- WeightActQuantResult,
- WeightOnlyQuantResult,
-)
-
-
-class WeightOnlyQuantizedLinearLayer(torch.nn.Module):
- """Linear Layer with weight only quantization."""
-
- def __init__(
- self,
- in_features: int,
- out_features: int,
- q_weight: torch.Tensor,
- weight_scale: torch.Tensor,
- zeros: torch.Tensor,
- bias: Optional[torch.nn.Parameter] = None,
- ):
- """Initialize the Weight Only Quantized Linear Layer."""
- super().__init__()
- self.in_features = in_features
- self.out_features = out_features
- self.weight_scale = torch.nn.Parameter(weight_scale)
- self.zeros = torch.nn.Parameter(zeros, requires_grad=False)
- self.weight = torch.nn.Parameter(q_weight, requires_grad=False)
- self.register_parameter("bias", bias)
-
- @staticmethod
- def from_layer(
- layer: torch.nn.Module, quant_result: CommonQuantResult
- ) -> torch.nn.Module:
- """Returns the quantized layer from the original layer."""
- q_result = cast(WeightOnlyQuantResult, quant_result)
- return WeightOnlyQuantizedLinearLayer(
- cast(torch.nn.Linear, layer).in_features,
- cast(torch.nn.Linear, layer).out_features,
- q_result.q_weight,
- q_result.weight_scale,
- q_result.zero_point,
- cast(torch.nn.Linear, layer).bias,
- )
-
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- """Forward pass with fake quantization. Not used in conversion."""
- raise NotImplementedError("Not used in conversion.")
-
-
-class WeightActQuantizedLinearLayer(torch.nn.Module):
- """Linear Layer with weight-act quantization."""
-
- def __init__( # pylint: disable=too-many-arguments
- self,
- q_weight: torch.Tensor,
- weight_scale: torch.Tensor,
- act_scale: torch.Tensor,
- bias: Optional[torch.nn.Parameter] = None,
- ):
- """Initialize the Weight Only Quantized Linear Layer."""
- super().__init__()
- self.in_scale = torch.nn.Parameter(act_scale)
- self.weight_scale = torch.nn.Parameter(weight_scale)
- self.weight = torch.nn.Parameter(q_weight, requires_grad=False)
- self.register_parameter("bias", bias)
-
- @staticmethod
- def from_layer(
- layer: torch.nn.Module, quant_result: CommonQuantResult
- ) -> torch.nn.Module:
- """Returns the quantized layer from the original layer."""
- q_result = cast(WeightActQuantResult, quant_result)
- return WeightActQuantizedLinearLayer(
- q_result.q_weight,
- q_result.weight_scale,
- q_result.act_scale,
- cast(torch.nn.Linear, layer).bias if hasattr(layer, "bias") else None,
- )
-
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- """Forward pass with fake quantization. Not used in conversion."""
- raise NotImplementedError("Not used in conversion.")
diff --git a/friendli/modules/quantizer/maps.py b/friendli/modules/quantizer/maps.py
deleted file mode 100644
index 465d5c3e..00000000
--- a/friendli/modules/quantizer/maps.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Quantizer Maps."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, Type
-
-from friendli.enums import QuantMode
-from friendli.errors import NotSupportedQuantModeError
-from friendli.modules.converter.base import OneOfConverter
-from friendli.modules.converter.utils import get_model_arch
-from friendli.modules.quantizer.awq.base import AWQHook, AWQQuantizer
-from friendli.modules.quantizer.awq.models.gpt_neox import AWQGPTNeoXHook
-from friendli.modules.quantizer.awq.models.gptj import AWQGPTJHook
-from friendli.modules.quantizer.awq.models.llama import AWQLlamaHook
-from friendli.modules.quantizer.awq.models.mpt import AWQMPTHook
-from friendli.modules.quantizer.base import CommonQuantizer, FP8QuantHook, FP8Quantizer
-from friendli.modules.quantizer.models.arctic import ArcticHook
-from friendli.modules.quantizer.models.dbrx import DbrxHook
-from friendli.modules.quantizer.models.llama import LlamaHook
-from friendli.modules.quantizer.models.mixtral import MixtralHook
-from friendli.modules.quantizer.models.mpt import MPTHook
-from friendli.modules.quantizer.models.phi3 import Phi3Hook
-from friendli.modules.quantizer.schema.config import OneOfQuantConfig
-from friendli.modules.quantizer.smoothquant.base import (
- SmoothQuantHook,
- SmoothQuantQuantizer,
-)
-from friendli.modules.quantizer.smoothquant.models.bloom import SmoothQuantBloomHook
-from friendli.modules.quantizer.smoothquant.models.codegen import SmoothQuantCodeGenHook
-from friendli.modules.quantizer.smoothquant.models.falcon import SmoothQuantFalconHook
-from friendli.modules.quantizer.smoothquant.models.gpt2 import SmoothQuantGPT2Hook
-from friendli.modules.quantizer.smoothquant.models.gpt_neox import (
- SmoothQuantGPTNeoXHook,
-)
-from friendli.modules.quantizer.smoothquant.models.gptj import SmoothQuantGPTJHook
-from friendli.modules.quantizer.smoothquant.models.llama import SmoothQuantLlamaHook
-from friendli.modules.quantizer.smoothquant.models.mpt import SmoothQuantMPTHook
-from friendli.modules.quantizer.smoothquant.models.opt import SmoothQuantOPTHook
-
-model_arch_smoothquant_hook_map: Dict[str, type[SmoothQuantHook]] = {
- "OPTForCausalLM": SmoothQuantOPTHook,
- "MPTForCausalLM": SmoothQuantMPTHook,
- "BloomForCausalLM": SmoothQuantBloomHook,
- "CodeGenForCausalLM": SmoothQuantCodeGenHook,
- "GPTNeoXForCausalLM": SmoothQuantGPTNeoXHook,
- "GPTJForCausalLM": SmoothQuantGPTJHook,
- "GPT2LMHeadModel": SmoothQuantGPT2Hook,
- "FalconForCausalLM": SmoothQuantFalconHook,
- "LlamaForCausalLM": SmoothQuantLlamaHook,
-}
-
-model_arch_awq_hook_map: Dict[str, type[AWQHook]] = {
- "GPTJForCausalLM": AWQGPTJHook,
- "GPTNeoXForCausalLM": AWQGPTNeoXHook,
- "LlamaForCausalLM": AWQLlamaHook,
- "MPTForCausalLM": AWQMPTHook,
- "MistralForCausalLM": AWQLlamaHook,
-}
-
-model_arch_fp8_hook_map: Dict[str, type[FP8QuantHook]] = {
- "LlamaForCausalLM": LlamaHook,
- "MistralForCausalLM": LlamaHook,
- "MixtralForCausalLM": MixtralHook,
- "MPTForCausalLM": MPTHook,
- "CohereForCausalLM": LlamaHook,
- "DbrxForCausalLM": DbrxHook,
- "Phi3ForCausalLM": Phi3Hook,
- "ArcticForCausalLM": ArcticHook,
-}
-
-
-def get_quanthook_map(quant_mode: QuantMode) -> Dict[str, Any]:
- """Get quantizer map."""
- if quant_mode == QuantMode.SMOOTH_QUANT:
- return model_arch_smoothquant_hook_map
- if quant_mode == QuantMode.AWQ:
- return model_arch_awq_hook_map
- if quant_mode == QuantMode.FP8:
- return model_arch_fp8_hook_map
- raise NotSupportedQuantModeError(
- invalid_option=quant_mode,
- valid_options=[e.value for e in QuantMode],
- )
-
-
-def get_quantizer_class(quant_mode: QuantMode) -> Type[CommonQuantizer]:
- """Get quantizer class."""
- if quant_mode == QuantMode.SMOOTH_QUANT:
- return SmoothQuantQuantizer
- if quant_mode == QuantMode.AWQ:
- return AWQQuantizer
- if quant_mode == QuantMode.FP8:
- return FP8Quantizer
- raise NotSupportedQuantModeError(
- invalid_option=quant_mode,
- valid_options=[e.value for e in QuantMode],
- )
-
-
-def get_quantized_converter(
- quant_config: OneOfQuantConfig,
- converter: OneOfConverter,
-) -> CommonQuantizer:
- """Get quantizer for specific model architecture with quant mode and args."""
- model_arch = get_model_arch(converter.config)
- quant_mode = quant_config.mode
- quantizer = get_quantizer_class(quant_mode)
- quanthook_map = get_quanthook_map(quant_mode)
- quanthook = quanthook_map[model_arch](quant_config, converter)
- return quantizer(quanthook, quant_config, converter)
diff --git a/friendli/modules/quantizer/models/arctic.py b/friendli/modules/quantizer/models/arctic.py
deleted file mode 100644
index cc7d3fd9..00000000
--- a/friendli/modules/quantizer/models/arctic.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli ArcticForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type
-
-import torch
-
-from friendli.modules.quantizer.base import FP8QuantHook
-from friendli.modules.quantizer.schema.data import (
- HFQuantInput,
- HFTFQuantInputs,
- TFQuantInputs,
-)
-
-
-class ArcticHook(FP8QuantHook):
- """FP8QuantHook for ArcticForCausalLM."""
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in ArcticForCausalLM."""
- return model.model.layers
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in ArcticForCausalLM."""
- return (torch.nn.Linear,)
-
- def iter_tf_quant_inputs(
- self, model: torch.nn.Module
- ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of ArcticForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.self_attn
- block_sparse_moe = decoder_layer.block_sparse_moe
- mlp = decoder_layer.residual_mlp
- moe_ff1_ff_gate_target_names = []
- for expert_idx in range(self.converter.num_experts):
- moe_ff1_ff_gate_target_names.extend(
- [
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w1",
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w3",
- ]
- )
-
- yield HFTFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj",
- f"{self.quantized_layer_prefix}{index}.self_attn.k_proj",
- f"{self.quantized_layer_prefix}{index}.self_attn.v_proj",
- ],
- local_names=["q_proj", "k_proj", "v_proj"],
- ),
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- ],
- local_names=[
- "o_proj",
- ],
- ),
- # router
- HFQuantInput(
- parent_module=block_sparse_moe,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.gate",
- ],
- local_names=["gate"],
- ),
- # ff1, ff_gate in each moe
- HFQuantInput(
- parent_module=block_sparse_moe.experts,
- target_names=moe_ff1_ff_gate_target_names,
- local_names=["w1", "w3"],
- ),
- # ff2 in each moe
- HFQuantInput(
- parent_module=block_sparse_moe.experts,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w2"
- for expert_idx in range(self.converter.num_experts)
- ],
- local_names=["w2"],
- ),
- # ff1, ff_gate in parallel mlp
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.residual_mlp.w1",
- f"{self.quantized_layer_prefix}{index}.residual_mlp.w3",
- ],
- local_names=["w1", "w3"],
- ),
- # ff2 in parallel mlp
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.residual_mlp.w2"
- ],
- local_names=["w2"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer/models/dbrx.py b/friendli/modules/quantizer/models/dbrx.py
deleted file mode 100644
index f4e3232a..00000000
--- a/friendli/modules/quantizer/models/dbrx.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli DbrxForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Dict, Iterator, List, Tuple, Type, cast
-
-import torch
-from torch.nn.modules import Module
-from tqdm import tqdm
-from transformers.models.dbrx.modeling_dbrx import DbrxBlock, DbrxConfig, DbrxExpertGLU
-
-from friendli.modules.quantizer.base import FP8QuantHook
-from friendli.modules.quantizer.schema.data import (
- HFQuantInput,
- HFTFQuantInputs,
- TFQuantInputs,
-)
-
-
-class DbrxLinearLayer(torch.nn.Module):
- """Custom FF2Proj layer for DbrxForCausalLM."""
-
- def __init__(self, weight: torch.nn.Parameter):
- """Initialize the DbrxLinearLayer."""
- super().__init__()
- self.weight = weight
-
- def forward(self, x: torch.Tensor, chunked_weight: torch.Tensor) -> torch.Tensor:
- """Forward pass for the DbrxLinearLayer."""
- return x.matmul(chunked_weight)
-
-
-class CustomDbrxExpertGLU(DbrxExpertGLU):
- """Custom DbrxExpertGLU layer for DbrxForCausalLM.
-
- This layer is used to replace the DbrxExpertGLU layer in DbrxForCausalLM.
- For collecting input of the ff2 layer in each experts, we need to override the forward method.
- """
-
- def __init__(self, layer: DbrxExpertGLU, ffn_act_fn: Dict):
- """Initialize the CustomDbrxExpertGLU."""
- super().__init__(
- layer.hidden_size, layer.ffn_hidden_size, layer.moe_num_experts, ffn_act_fn
- )
-
- self.v1_linear = DbrxLinearLayer(layer.v1.detach())
- self.w1_linear = DbrxLinearLayer(layer.w1.detach())
- self.w2_linear = DbrxLinearLayer(layer.w2.detach())
-
- def forward(
- self,
- x: torch.Tensor,
- expert_w1: torch.Tensor,
- expert_v1: torch.Tensor,
- expert_w2: torch.Tensor,
- ) -> torch.Tensor:
- """Forward pass for the CustomDbrxExpertGLU."""
- gate_proj = self.w1_linear(x, expert_w1.t())
- up_proj = self.v1_linear(x, expert_v1.t())
- gate_proj = self.activation_fn(gate_proj)
- intermediate_states = gate_proj * up_proj
- down_proj = self.w2_linear(intermediate_states, expert_w2)
- return down_proj
-
- @staticmethod
- def from_layer(layer: DbrxExpertGLU, config: DbrxConfig) -> CustomDbrxExpertGLU:
- """Creates a CustomDbrxExpertGLU layer from a DbrxExpertGLU layer."""
- custom_layer = CustomDbrxExpertGLU(layer, config.ffn_config.ffn_act_fn)
- custom_layer.v1 = layer.v1
- custom_layer.w1 = layer.w1
- custom_layer.w2 = layer.w2
- return custom_layer
-
-
-class DbrxHook(FP8QuantHook):
- """FP8QuantHook for DbrxForCausalLM."""
-
- def get_quantized_param_names(self, model: torch.nn.Module) -> List[str]:
- """Return the parameter names of quantized layers."""
- quantized_param_names = []
- for index in range(
- len(self.get_tf_blocks(model)) # type: ignore[union-attr, arg-type]
- ):
- quantized_param_names.extend(
- [
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv.weight",
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj.weight",
- f"{self.quantized_layer_prefix}{index}.ffn.router.layer.weight",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2",
- ]
- )
- return quantized_param_names
-
- def get_quantized_param_scale_names(self, model: torch.nn.Module) -> List[str]:
- """Return the parameter scale names of quantized layers."""
- quantized_param_scale_names = []
- for index in range(
- len(self.get_tf_blocks(model)) # type: ignore[union-attr, arg-type]
- ):
- quantized_param_scale_names.extend(
- [
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv.weight_scale",
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj.weight_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.router.layer.weight_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1_weight_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1_weight_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2_weight_scale",
- ]
- )
- quantized_param_scale_names.extend(
- [
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv.in_scale",
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj.in_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.router.layer.in_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1_in_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1_in_scale",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2_in_scale",
- ]
- )
- return quantized_param_scale_names
-
- def pre_quantize(self, model: Module) -> torch.nn.Module:
- """Pre-quantization hook for DbrxForCausalLM."""
- for decoder_layer in tqdm(
- self.get_tf_blocks(model),
- desc="Pre-quantizing DbrxForCausalLM",
- unit="layer",
- ):
- cast(
- DbrxBlock, decoder_layer
- ).ffn.experts.mlp = CustomDbrxExpertGLU.from_layer(
- cast(DbrxBlock, decoder_layer).ffn.experts.mlp, self.converter.config
- )
- return model
-
- def post_quantize(self, model: Module) -> torch.nn.Module:
- """Post-quantization hook for DbrxForCausalLM."""
- for decoder_layer in tqdm(
- self.get_tf_blocks(model),
- desc="Post-quantizing DbrxForCausalLM",
- unit="layer",
- ):
- mlp = cast(DbrxBlock, decoder_layer).ffn.experts.mlp
-
- # ff1
- setattr(mlp, "v1_in_scale", mlp.v1_linear.in_scale)
- setattr(mlp, "v1_weight_scale", mlp.v1_linear.weight_scale)
- mlp.v1 = mlp.v1_linear.weight
- del mlp.v1_linear
-
- # ff_gate
- setattr(mlp, "w1_in_scale", mlp.w1_linear.in_scale)
- setattr(mlp, "w1_weight_scale", mlp.w1_linear.weight_scale)
- mlp.w1 = mlp.w1_linear.weight
- del mlp.w1_linear
-
- # ff2
- setattr(mlp, "w2_in_scale", mlp.w2_linear.in_scale)
- setattr(mlp, "w2_weight_scale", mlp.w2_linear.weight_scale)
- mlp.w2 = mlp.w2_linear.weight
- del mlp.w2_linear
- return model
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in DbrxForCausalLM."""
- return model.transformer.blocks
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in DbrxForCausalLM."""
- return (
- torch.nn.Linear,
- DbrxLinearLayer,
- )
-
- def iter_tf_quant_inputs(
- self, model: torch.nn.Module
- ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of DbrxForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = cast(DbrxBlock, decoder_layer).norm_attn_norm.attn
- mlp = cast(DbrxBlock, decoder_layer).ffn.experts.mlp
-
- yield HFTFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.Wqkv",
- ],
- local_names=["Wqkv"],
- ),
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.norm_attn_norm.attn.out_proj",
- ],
- local_names=[
- "out_proj",
- ],
- ),
- HFQuantInput(
- parent_module=cast(DbrxBlock, decoder_layer).ffn.router,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.ffn.router.layer",
- ],
- local_names=["layer"],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w1_linear",
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.v1_linear",
- ],
- local_names=["w1_linear", "v1_linear"],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.ffn.experts.mlp.w2_linear"
- ],
- local_names=["w2_linear"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer/models/llama.py b/friendli/modules/quantizer/models/llama.py
deleted file mode 100644
index d4002955..00000000
--- a/friendli/modules/quantizer/models/llama.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli LlamaForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Iterator, List, Tuple, Type
-
-import torch
-
-from friendli.modules.quantizer.base import FP8QuantHook
-from friendli.modules.quantizer.schema.data import (
- HFQuantInput,
- HFTFQuantInputs,
- TFQuantInputs,
-)
-
-
-class LlamaHook(FP8QuantHook):
- """FP8QuantHook for LlamaForCausalLM."""
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in LlamaForCausalLM."""
- return model.model.layers
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in LlamaForCausalLM."""
- return (torch.nn.Linear,)
-
- def iter_tf_quant_inputs(
- self, model: torch.nn.Module
- ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of LlamaForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.self_attn
- mlp = decoder_layer.mlp
-
- yield HFTFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj",
- f"{self.quantized_layer_prefix}{index}.self_attn.k_proj",
- f"{self.quantized_layer_prefix}{index}.self_attn.v_proj",
- ],
- local_names=["q_proj", "k_proj", "v_proj"],
- ),
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- ],
- local_names=[
- "o_proj",
- ],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.up_proj",
- f"{self.quantized_layer_prefix}{index}.mlp.gate_proj",
- ],
- local_names=["up_proj", "gate_proj"],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.down_proj"
- ],
- local_names=["down_proj"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer/models/mixtral.py b/friendli/modules/quantizer/models/mixtral.py
deleted file mode 100644
index 70abc34b..00000000
--- a/friendli/modules/quantizer/models/mixtral.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli MixtralForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List
-
-import torch
-
-from friendli.modules.quantizer.models.llama import LlamaHook
-from friendli.modules.quantizer.schema.data import (
- HFQuantInput,
- HFTFQuantInputs,
- TFQuantInputs,
-)
-
-
-class MixtralHook(LlamaHook):
- """FP8QuantHook for MixtralForCausalLM."""
-
- def iter_tf_quant_inputs(
- self, model: torch.nn.Module
- ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of MixtralForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.self_attn
- block_sparse_moe = decoder_layer.block_sparse_moe
- moe_ff1_ff_gate_target_names = []
- for expert_idx in range(self.converter.num_experts):
- moe_ff1_ff_gate_target_names.extend(
- [
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w1",
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w3",
- ]
- )
-
- yield HFTFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj",
- f"{self.quantized_layer_prefix}{index}.self_attn.k_proj",
- f"{self.quantized_layer_prefix}{index}.self_attn.v_proj",
- ],
- local_names=["q_proj", "k_proj", "v_proj"],
- ),
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- ],
- local_names=[
- "o_proj",
- ],
- ),
- # router
- HFQuantInput(
- parent_module=block_sparse_moe,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.gate",
- ],
- local_names=["gate"],
- ),
- # ff1, ff_gate in each moe
- HFQuantInput(
- parent_module=block_sparse_moe.experts,
- target_names=moe_ff1_ff_gate_target_names,
- local_names=["w1", "w3"],
- ),
- # ff2 in each moe
- HFQuantInput(
- parent_module=block_sparse_moe.experts,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.block_sparse_moe.experts.{expert_idx}.w2"
- for expert_idx in range(self.converter.num_experts)
- ],
- local_names=["w2"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer/models/mpt.py b/friendli/modules/quantizer/models/mpt.py
deleted file mode 100644
index 39a17ff1..00000000
--- a/friendli/modules/quantizer/models/mpt.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli MPTForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type
-
-import torch
-
-from friendli.modules.quantizer.base import FP8QuantHook
-from friendli.modules.quantizer.schema.data import (
- HFQuantInput,
- HFTFQuantInputs,
- TFQuantInputs,
-)
-
-
-class MPTHook(FP8QuantHook):
- """FP8QuantHook for MPTForCausalLM."""
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in MPTForCausalLM."""
- return model.transformer.blocks
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in MPTForCausalLM."""
- return (torch.nn.Linear,)
-
- def iter_tf_quant_inputs(
- self, model: torch.nn.Module
- ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of MPTForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.attn
- mlp = decoder_layer.ffn
-
- yield HFTFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- ],
- local_names=["Wqkv"],
- ),
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- ],
- local_names=[
- "out_proj",
- ],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.ffn.up_proj",
- ],
- local_names=["up_proj"],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.ffn.down_proj"
- ],
- local_names=["down_proj"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer/models/phi3.py b/friendli/modules/quantizer/models/phi3.py
deleted file mode 100644
index 4d4d15cb..00000000
--- a/friendli/modules/quantizer/models/phi3.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Phi3ForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type
-
-import torch
-
-from friendli.modules.quantizer.base import FP8QuantHook
-from friendli.modules.quantizer.schema.data import (
- HFQuantInput,
- HFTFQuantInputs,
- TFQuantInputs,
-)
-
-
-class Phi3Hook(FP8QuantHook):
- """FP8QuantHook for Phi3ForCausalLM."""
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the transformer blocks in Phi3ForCausalLM."""
- return model.model.layers
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in Phi3ForCausalLM."""
- return (torch.nn.Linear,)
-
- def iter_tf_quant_inputs(
- self, model: torch.nn.Module
- ) -> Iterator[TFQuantInputs] | Iterator[HFTFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of Phi3ForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.self_attn
- mlp = decoder_layer.mlp
-
- yield HFTFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.qkv_proj",
- ],
- local_names=["qkv_proj"],
- ),
- HFQuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- ],
- local_names=[
- "o_proj",
- ],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.gate_up_proj",
- ],
- local_names=["gate_up_proj"],
- ),
- HFQuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.down_proj"
- ],
- local_names=["down_proj"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer/schema/__init__.py b/friendli/modules/quantizer/schema/__init__.py
deleted file mode 100644
index f5d8dd04..00000000
--- a/friendli/modules/quantizer/schema/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer Schema."""
diff --git a/friendli/modules/quantizer/schema/config.py b/friendli/modules/quantizer/schema/config.py
deleted file mode 100644
index 2ca36f7b..00000000
--- a/friendli/modules/quantizer/schema/config.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer Config Schema."""
-
-from __future__ import annotations
-
-from typing import Literal, Union
-
-from pydantic import BaseModel, Field
-from typing_extensions import Annotated
-
-from friendli.enums import ModelDataType, QuantDatasetFormat, QuantMode
-
-
-class CalibrationDatasetConfig(BaseModel):
- """Calibration dataset config."""
-
- path_or_name: str = "cnn_dailymail:3.0.0"
- format: QuantDatasetFormat = QuantDatasetFormat.JSON
- split: str = "validation"
- lookup_column_name: str = "article"
- num_samples: int = 512
- max_length: int = 512
-
-
-class AbstractQuantConfig(BaseModel):
- """Abstract quantization config."""
-
- mode: QuantMode
- device: str = "cuda:0"
- offload: bool = True
- seed: int = 42
- percentile: float = 100.0
- quant_dtype: ModelDataType = ModelDataType.INT8
- calibration_dataset: CalibrationDatasetConfig = Field(
- default_factory=CalibrationDatasetConfig
- )
-
-
-class FP8QuantConfig(AbstractQuantConfig):
- """FP8 quantization config.
-
- The data type of parameters are converted to the one specified at `quant_dtype`
- by using calibration dataset. The quantization scale for weight and activation is
- added to converted checkpoint.
-
- """
-
- mode: Literal[QuantMode.FP8] = QuantMode.FP8
-
-
-class SmoothQuantArgs(BaseModel):
- """SmoothQuant args."""
-
- migration_strength: float = 0.5
- attn_fc_smoothing: bool = False
- ff2_smoothing: bool = False
-
-
-class SmoothQuantConfig(AbstractQuantConfig):
- """SmoothQuant config."""
-
- mode: Literal[QuantMode.SMOOTH_QUANT] = QuantMode.SMOOTH_QUANT
- smoothquant_args: SmoothQuantArgs = Field(default_factory=SmoothQuantArgs)
-
-
-class AWQArgs(BaseModel):
- """AWQ args."""
-
- quant_dtype: ModelDataType = ModelDataType.INT4
- quant_bit: int = 4
- quant_group_size: int = 64
-
-
-class AWQConfig(AbstractQuantConfig):
- """AWQ config."""
-
- mode: Literal[QuantMode.AWQ] = QuantMode.AWQ
- awq_args: AWQArgs = Field(default_factory=AWQArgs)
-
-
-OneOfQuantConfig = Annotated[
- Union[SmoothQuantConfig, AWQConfig, FP8QuantConfig], Field(discriminator="mode")
-]
-
-
-class QuantConfig(BaseModel):
- """Quantization config."""
-
- config: OneOfQuantConfig
diff --git a/friendli/modules/quantizer/schema/data.py b/friendli/modules/quantizer/schema/data.py
deleted file mode 100644
index ae472126..00000000
--- a/friendli/modules/quantizer/schema/data.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer Data Schema."""
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Callable, List, Optional
-
-import torch
-
-from friendli.enums import ModelDataType
-
-ModuleName = str
-
-
-@dataclass
-class CommonQuantResult:
- """Dataclass for quantization result per layer."""
-
- module_name: str
- quant_dtype: ModelDataType
- q_group_size: int
- zero_point: torch.Tensor
-
-
-@dataclass
-class WeightOnlyQuantResult(CommonQuantResult):
- """Dataclass for weight-only quantization result per layer."""
-
- weight_scale: torch.Tensor
- q_weight: torch.Tensor
-
-
-@dataclass
-class WeightActQuantResult(WeightOnlyQuantResult):
- """Dataclass for weight-activation quantization result per layer."""
-
- act_scale: torch.Tensor
- zero_point: torch.Tensor
- q_group_size: int
-
-
-@dataclass
-class QuantInput:
- """Dataclass for int8 quantization input of each layer in transformer block."""
-
- weight: torch.Tensor # [OutDim, InDim]
- name: ModuleName
- start_offset: Optional[int] # start offset of the weight tensor along the out_dim
- end_offset: Optional[int] # end offset of the weight tensor along the out_dim
- sort_fn: Optional[
- Callable[[torch.Tensor], torch.Tensor]
- ] = None # sort function for max_output_stats
-
-
-@dataclass
-class HFQuantInput:
- """Dataclass for quantization input of each layer in transformer block.
-
- Attributes:
- parent_module: module contains target layers.
- target_names: list of target module's full name
- (ex. model.model.layers.0.self_attn.q_proj, )
- local_names: list of target module's name using when access from parent_module
- (ex. q_proj, k_proj, v_proj )
- """
-
- parent_module: torch.nn.Module
- target_names: List[ModuleName]
- local_names: str
-
-
-@dataclass
-class HFTFQuantInputs:
- """Dataclass for quantization input per transformer block."""
-
- layer_index: int
- block: torch.nn.Module
- quant_inputs: List[HFQuantInput]
-
-
-@dataclass
-class TFQuantInputs: # pylint: disable=too-many-instance-attributes
- """Dataclass for int8 quantization input per transformer block."""
-
- layer_index: int
- block: torch.nn.Module
- q: QuantInput
- k: QuantInput
- v: QuantInput
- attn_fc: QuantInput
- ff1: QuantInput
- ff2: QuantInput
-
-
-@dataclass
-class TFQuantResults: # pylint: disable=too-many-instance-attributes
- """Dataclass for int8 quantization result per a transformer block."""
-
- layer_prefix_with_index: str
- block: torch.nn.Module
- q: CommonQuantResult
- k: CommonQuantResult
- v: CommonQuantResult
- attn_fc: CommonQuantResult
- ff1: CommonQuantResult
- ff2: CommonQuantResult
diff --git a/friendli/modules/quantizer/smoothquant/__init__.py b/friendli/modules/quantizer/smoothquant/__init__.py
deleted file mode 100644
index 5205fe18..00000000
--- a/friendli/modules/quantizer/smoothquant/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model SmoothQuant Quantizer."""
diff --git a/friendli/modules/quantizer/smoothquant/base.py b/friendli/modules/quantizer/smoothquant/base.py
deleted file mode 100644
index 8ee4e1a7..00000000
--- a/friendli/modules/quantizer/smoothquant/base.py
+++ /dev/null
@@ -1,567 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli SmoothQuant Quantizer Base."""
-
-from __future__ import annotations
-
-from abc import abstractmethod
-from dataclasses import fields
-from typing import Any, Dict, Iterator, List, Tuple, cast
-
-import datasets # type: ignore[import]
-import torch
-
-from friendli.enums import ModelDataType
-from friendli.errors import NotSupportedQuantConfigError
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.interface import ModelConversionInterface
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.converter.utils import get_tokenizer
-from friendli.modules.quantizer.base import AbstractQuantHook, CommonQuantizer
-from friendli.modules.quantizer.layers import WeightActQuantizedLinearLayer
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import (
- ModuleName,
- QuantInput,
- TFQuantInputs,
- TFQuantResults,
- WeightActQuantResult,
-)
-from friendli.modules.quantizer.utils import (
- collect_stats,
- get_weight_act_quant_scales,
- quantized_linear_weight_reshape,
- quantized_qkv_weight_reshape,
- safe_load_datasets,
- scale_reshape,
-)
-
-
-class PreSmoother(torch.nn.Module):
- """Module for containing smoothing scale.
-
- This module is used to contain the smoothing scale for the quantization.
- If the matmul layer have previous layer, the smoothing scale can be migrated
- to the previous layer. But, if the matmul layer is the first layer, the scale
- need to be stored in this module. Especially, When MLP ff2 layer with previous activation
- layer that prevent migrating the scale to the previous layer needs SmoothQuant, then,
- this module is used to store the smoothing scale. [SmoothQunat Issue #15]
- (https://github.com/mit-han-lab/smoothquant/issues/15#issuecomment-1353390283).
-
- Args:
- in_dim (float): input dimension of the matmul layer's weight dimension.
- """
-
- def __init__(self, in_dim: int):
- """Initialize PreSmoother."""
- super().__init__()
- self.scale = torch.nn.Parameter(torch.ones(in_dim, dtype=torch.float32))
-
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- """Forward function of PreSmoother."""
- return (x * self.scale).to(x.dtype)
-
-
-class SmoothQuantHook(AbstractQuantHook):
- """Quantization Hook for SmoothQuant."""
-
- @abstractmethod
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the attention fc layer in the decoder block."""
-
- @abstractmethod
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the second feed-forward layer in the decoder block."""
-
- @abstractmethod
- def iter_smooth_norm_weights(
- self, model: torch.nn.Module
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm and linear layer's weight per transformer block."""
-
- def _register_pre_smoother(self, linear: torch.nn.Linear) -> PreSmoother:
- """Register pre_smoother storing smoothing scale of linear layer."""
- pre_smoother = PreSmoother(linear.in_features).to(device=linear.weight.device)
-
- def pre_smoother_hook(_, x: Tuple[Any, ...]) -> Tuple[torch.Tensor, ...]:
- return (pre_smoother.forward(x[0]),)
-
- linear.register_forward_pre_hook(pre_smoother_hook)
- return pre_smoother
-
- def pre_smooth(
- self,
- model: torch.nn.Module,
- ) -> torch.nn.Module:
- """Pre-procedure for SmoothQuant before Smoothing."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- for decoder_layer in self.get_tf_blocks(model):
- if quant_args.attn_fc_smoothing:
- attn_fc_pre_smoother = self._register_pre_smoother(
- self.get_attn_fc_layer(decoder_layer)
- )
- decoder_layer.add_module("attn_fc_pre_smoother", attn_fc_pre_smoother)
- if quant_args.ff2_smoothing:
- ff2_pre_smoother = self._register_pre_smoother(
- self.get_ff2_layer(decoder_layer)
- )
- decoder_layer.add_module("ff2_pre_smoother", ff2_pre_smoother)
- return model
-
- def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor:
- """Sort max_output_stas for seperating qkv_layer's output_stats."""
- return max_output_stat
-
- def copy_norms(self, model: torch.nn.Module) -> torch.nn.Module:
- """Copy and Register norms in transformer block for seperated scaling.
-
- In some models(e.g. llama, gptj, codegen), matmul layers share activations
- from the same norms. Therefore, we need to copy and register the norms for
- seperated smoothing scale. For example, in llama, normalization layer is
- shared with gate linear layer and attention linear layer. Thus, we need to
- copy and register the norms for each linear layer and use them for smoothing.
- """
- return model
-
- def get_quant_result(
- self,
- quant_inputs: TFQuantInputs,
- **kwargs: Any,
- ) -> TFQuantResults:
- """Returns the quantization result of the quantized layer.
-
- If the model has another quantized layer, it should be implemented in the subclass.
-
- """
- max_input_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_input_stats"]
- max_output_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_output_stats"]
-
- def get_scale(
- quant_input: QuantInput,
- ) -> WeightActQuantResult:
- weight, name, start, end, sort_fn = (
- quant_input.weight,
- quant_input.name,
- quant_input.start_offset,
- quant_input.end_offset,
- quant_input.sort_fn,
- )
-
- return get_weight_act_quant_scales(
- name,
- max_input_stats[name],
- weight[start:end],
- weight[start:end],
- sort_fn(max_output_stats[name])[start:end]
- if sort_fn
- else max_output_stats[name][start:end],
- )
-
- return TFQuantResults(
- layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_inputs.layer_index}.",
- block=quant_inputs.block,
- q=get_scale(quant_inputs.q),
- k=get_scale(quant_inputs.k),
- v=get_scale(quant_inputs.v),
- attn_fc=get_scale(quant_inputs.attn_fc),
- ff1=get_scale(quant_inputs.ff1),
- ff2=get_scale(quant_inputs.ff2),
- )
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for modified modules.
-
- This convert_info_list is used for modules that are modified for quantization.
- Especially, for attention fc layer and MLP ff2 layer, we need to migrate
- smooth scale to the previous layer. Thus, we add the smoothing scaler, and
- modify the convert_info_list for the modified modules.
-
- In some models, matmul layers share activations from the same norms. Therefore,
- we use `copy_norms()` to copy and register the norms for seperated smoothing scale.
- Thus, we modify the convert_info_list for the modified modules.
- """
- sq_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- new_layer_convert_info_list = []
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
-
- if sq_args.attn_fc_smoothing:
- new_layer_convert_info_list.append(
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc_pre_smoother.scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_proj/smoothquant/smoothing_vector:0", # pylint: disable=line-too-long
- reshape_fn=scale_reshape,
- )
- )
- if sq_args.ff2_smoothing:
- new_layer_convert_info_list.append(
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2_pre_smoother.scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/smoothing_vector:0", # pylint: disable=line-too-long
- reshape_fn=scale_reshape,
- )
- )
-
- return new_layer_convert_info_list
-
- @property
- def quantized_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Return the list of conversion informations for quantized layers."""
- convert_info_list = []
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}q.weight_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/q_weight_scale:0", # pylint: disable=line-too-long
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}k.weight_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/k_weight_scale:0", # pylint: disable=line-too-long
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}v.weight_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/v_weight_scale:0", # pylint: disable=line-too-long
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}q.out_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/q_out_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}k.out_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/k_out_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}v.out_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/v_out_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}q.in_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/in_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc.weight_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_proj/smoothquant/weight_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc.out_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_proj/smoothquant/out_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc.in_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}attn/c_proj/smoothquant/in_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff1.weight_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/weight_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff1.out_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/out_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff1.in_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/in_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2.weight_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/weight_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2.out_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/out_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2.in_scale"],
- data_type=ModelDataType.FP32,
- converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/in_scale:0",
- reshape_fn=scale_reshape,
- ),
- ConvertInfo(
- param_names=[
- f"{layer_prefix}q.weight",
- f"{layer_prefix}k.weight",
- f"{layer_prefix}v.weight",
- ],
- data_type=ModelDataType.INT8,
- converted_name=f"{converted_prefix}attn/c_attn/smoothquant/weight:0",
- reshape_fn=quantized_qkv_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}attn_fc.weight"],
- data_type=ModelDataType.INT8,
- converted_name=f"{converted_prefix}attn/c_proj/smoothquant/weight:0",
- reshape_fn=quantized_linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff1.weight"],
- data_type=ModelDataType.INT8,
- converted_name=f"{converted_prefix}mlp/c_fc/smoothquant/weight:0",
- reshape_fn=quantized_linear_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ff2.weight"],
- data_type=ModelDataType.INT8,
- converted_name=f"{converted_prefix}mlp/c_proj/smoothquant/weight:0",
- reshape_fn=quantized_linear_weight_reshape,
- ),
- ]
- )
- return convert_info_list
-
-
-class SmoothQuantQuantizer(CommonQuantizer, ModelConversionInterface):
- """Quantizer for SmoothQuant."""
-
- def check_config(self) -> None:
- """Check if the SmoothQuant quantization config is valid."""
- quant_config = cast(SmoothQuantConfig, self.quant_config)
- smoothquant_args = quant_config.smoothquant_args
- super().check_config()
- if 0 > smoothquant_args.migration_strength > 1:
- raise NotSupportedQuantConfigError(
- invalid_option=str(smoothquant_args.migration_strength),
- valid_options=["between 0 and 1."],
- )
-
- def get_calib_dataset(self) -> datasets.Dataset:
- """Get calibration dataset for SmoothQuant."""
- data_cfg = self.quant_config.calibration_dataset
- tokenizer = get_tokenizer(self.converter.config.name_or_path)
- dataset = safe_load_datasets(data_cfg)
-
- def preprocess(example) -> Dict[str, torch.Tensor]:
- truncate_length = data_cfg.max_length * 4
- while True:
- input_ids = tokenizer(
- example[data_cfg.lookup_column_name][:truncate_length],
- return_tensors="pt",
- max_length=data_cfg.max_length * 2,
- truncation=True,
- padding=False,
- ).input_ids
-
- if input_ids.size(
- 1
- ) >= data_cfg.max_length * 2 or truncate_length >= len(
- example[data_cfg.lookup_column_name]
- ):
- input_ids = input_ids[:, : data_cfg.max_length]
- break
-
- truncate_length *= 2
- return {"input_ids": input_ids}
-
- dataset = (
- dataset.shuffle(self.quant_config.seed)
- .select(range(data_cfg.num_samples))
- .select_columns([data_cfg.lookup_column_name])
- .map(function=preprocess)
- )
-
- return dataset
-
- @torch.no_grad()
- def _perform_smoothing(
- self,
- activation_norms: List[torch.Tensor],
- fc_weights: List[torch.Tensor],
- activation_max: torch.Tensor,
- *,
- migration_strength: float = 0.5,
- epsilon: float = 1e-5,
- inplace: bool = False,
- ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
- """Perform activation-weight smoothing in SmoothQuant.
-
- Performs the activation-weight smoothing scheme described in SmoothQuant
- (Xiao et al., 2023), which migrates the amplitude of outliers from activations
- to weights of matmul layers. The function takes in the following parameters:
-
- Args:
- activation_norms: torch.Tensors representing affine parameters
- (i.e., beta and gamma) of a normalization layer before each matmul layer.
- fc_weights: torch.Tensors representing the weight matrices of the matmul layer.
- activation_max: The maximum activation value of inputs of the matmul layer.
- migration_strength: the strength of the activation migration. Default is 0.5.
- epsilon: The epsilon used for numerical stability when calculating the scales.
- Default is 1e-5.
-
- Returns:
- A tuple of three torch.Tensors: (smoothed_activation_norms, smoothed_fc_weights)
-
- The function calculates "scales" as `pow(|Activation|, migration_strength) /
- pow(|Weight|, 1-migration_strength)` and applies the smoothing effect into
- a normalization layer that exists before every matmul layer. This is done because
- it is more efficient than introducing a new smoothing layer before every matmul layer.
- Fusing the smoothing effect into the normalization layer results in a faster and
- more efficient implementation of the smoothing scheme.
-
- The function returns the smoothed normalization coefficients and the smoothed weight
- matrices after the smoothing process.
- """
- # shape of activation norms: [InChannels]
- # shape of fc weights: [OutChannels, InChannels]
- # shape of activation_max: [InChannels]
-
- # pylint: disable=too-many-locals
- assert activation_norms
- assert fc_weights
-
- assert activation_norms[0].ndim == 1
- in_channels = activation_norms[0].size(0)
- device = activation_norms[0].device
- dtype = activation_norms[0].dtype
-
- for norm in activation_norms:
- assert tuple(norm.size()) == (in_channels,)
- assert norm.device == device
- assert norm.dtype == dtype
-
- for weight in fc_weights:
- assert weight.ndim == 2
- assert weight.size(1) == in_channels
- assert weight.device == device
- assert weight.dtype == dtype
-
- activation_max = activation_max.to(device=device)
- weight_max = fc_weights[0].abs().max(dim=0).values
- for weight in fc_weights[1:]:
- weight_max = torch.maximum(weight_max, weight.abs().max(dim=0).values)
-
- assert tuple(activation_max.size()) == (in_channels,)
- assert tuple(weight_max.size()) == (in_channels,)
- alpha = migration_strength
- scales = (
- (
- activation_max.to(dtype=torch.float32).pow(alpha)
- / weight_max.to(dtype=torch.float32).pow(1 - alpha)
- )
- .clamp(min=epsilon)
- .to(dtype=dtype)
- )
-
- scaled_activation_norms = [act_norm / scales for act_norm in activation_norms]
- scaled_weights = [w * scales.view(1, -1) for w in fc_weights]
-
- if inplace:
- for dst, src in zip(activation_norms, scaled_activation_norms):
- dst.copy_(src)
- for dst, src in zip(fc_weights, scaled_weights):
- dst.copy_(src)
-
- return scaled_activation_norms, scaled_weights
-
- def _smooth(
- self,
- model: torch.nn.Module,
- ) -> None:
- """Smooths the models before Quantization."""
- model.to(device=torch.device(self.quant_config.device))
- model.eval()
- model = cast(SmoothQuantHook, self.hook).pre_smooth(model)
-
- # collect stats for SmoothQuant scale.
- dataset = self.get_calib_dataset()
- quant_config = cast(SmoothQuantConfig, self.quant_config)
- max_input_stats, _ = collect_stats(
- model,
- quant_config.device,
- dataset,
- cast(SmoothQuantHook, self.hook).get_linear_layer_types(),
- tqdm_desc="Collecting stats for Smoothing.",
- percentile=100.0,
- )
-
- # TODO change name to pre_act_params, post_act_params
- # (attn_fc, ff2 are not scaled with norms)
- for norms, weights, name in cast(
- SmoothQuantHook, self.hook
- ).iter_smooth_norm_weights(model):
- self._perform_smoothing(
- norms,
- weights,
- max_input_stats[name],
- migration_strength=quant_config.smoothquant_args.migration_strength,
- inplace=True,
- )
-
- def pre_quantize(
- self,
- model: torch.nn.Module,
- ) -> None:
- """Pre-procedure that should be called before quantize() is called."""
- self._smooth(model)
-
- def quantize(
- self,
- model: torch.nn.Module,
- ) -> torch.nn.Module:
- """Quantize model with SmoothQuant."""
- dataset = self.get_calib_dataset()
- max_input_stats, max_output_stats = collect_stats(
- model,
- self.quant_config.device,
- dataset,
- cast(SmoothQuantHook, self.hook).get_linear_layer_types(),
- percentile=self.quant_config.percentile,
- tqdm_desc="Collecting stats for Static Quantization.",
- )
- for quant_input in self.hook.iter_tf_quant_inputs(model):
- assert isinstance(quant_input, TFQuantInputs)
- quant_result = cast(SmoothQuantHook, self.hook).get_quant_result(
- quant_input,
- max_input_stats=max_input_stats,
- max_output_stats=max_output_stats,
- )
-
- for field in fields(quant_result):
- layer_quant_result = getattr(quant_result, field.name)
- if isinstance(layer_quant_result, WeightActQuantResult):
- layer = model.get_submodule(layer_quant_result.module_name)
- q_layer = WeightActQuantizedLinearLayer.from_layer(
- layer, layer_quant_result
- )
- quant_result.block.add_module(field.name, q_layer)
-
- return model
diff --git a/friendli/modules/quantizer/smoothquant/models/bloom.py b/friendli/modules/quantizer/smoothquant/models/bloom.py
deleted file mode 100644
index 86fc39de..00000000
--- a/friendli/modules/quantizer/smoothquant/models/bloom.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli BloomForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Any, Dict, Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.bloom import ( # type: ignore[import]
- BloomConfig,
- BloomForCausalLM,
-)
-
-from friendli.modules.converter.base import OneOfConverter
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantBloomHook(SmoothQuantHook):
- """SmoothQuant Hook for BloomForCausalLM."""
-
- def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter):
- """Initialize SmoothQuantBloomHook."""
- super().__init__(quant_config, converter)
- self.num_heads = cast(BloomConfig, converter.config).num_attention_heads
- self.hidden_size = cast(BloomConfig, converter.config).hidden_size
- self.head_size = self.hidden_size // self.num_heads
-
- def iter_smooth_norm_weights(
- self,
- model: BloomForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight pr transformer block in BloomForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.input_layernorm.weight.data,
- decoder_layer.input_layernorm.bias.data,
- ],
- [
- decoder_layer.self_attention.query_key_value.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed
- yield (
- [
- decoder_layer.post_attention_layernorm.weight.data,
- decoder_layer.post_attention_layernorm.bias.data,
- ],
- [decoder_layer.mlp.dense_h_to_4h.weight.data], # [OutDim, InDim]
- f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.self_attention.dense.weight.data],
- f"{self.quantized_layer_prefix}{index}.self_attention.dense",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.mlp.dense_4h_to_h.weight.data],
- f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h",
- )
-
- def reshape_qkv_weight(
- self, attn_layer: torch.nn.Module
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
- """Reshapes the qkv weight in BloomForCausalLM for Quantization."""
- qkv_layer = cast(torch.nn.Linear, attn_layer.query_key_value)
- split_qkv_weight_list = torch.split(qkv_layer.weight, self.head_size, dim=0)
- num_heads = cast(BloomConfig, self.converter.config).num_attention_heads
-
- [q_weight, k_weight, v_weight] = [
- torch.cat(
- [split_qkv_weight_list[j * 3 + i] for j in range(num_heads)],
- dim=0,
- ).reshape(-1, self.hidden_size)
- for i in range(3)
- ]
- return q_weight, k_weight, v_weight
-
- def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor:
- """Sort max_output_stas for seperating qkv_layer's output_stats."""
- split_qkv_output_stat = torch.split(max_output_stat, self.head_size)
- qkv_output_stat_list = [
- torch.cat(
- [split_qkv_output_stat[j * 3 + i] for j in range(self.num_heads)],
- )
- for i in range(3)
- ]
- qkv_output_stat = torch.cat(qkv_output_stat_list)
- return qkv_output_stat
-
- def iter_tf_quant_inputs(self, model: BloomForCausalLM) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of BloomForCausalLM."""
- for index, decoder_layer in enumerate(model.transformer.h):
- self_attn = decoder_layer.self_attention
- q_weight, k_weight, v_weight = self.reshape_qkv_weight(self_attn)
- qkv_weight = torch.cat([q_weight, k_weight, v_weight], dim=0)
- qkv_weight_out_dim = qkv_weight.size(0)
- fc1 = decoder_layer.mlp.dense_h_to_4h
- fc2 = decoder_layer.mlp.dense_4h_to_h
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value",
- 0,
- qkv_weight_out_dim // 3,
- self.sort_qkv_output_stats,
- ),
- k=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value",
- qkv_weight_out_dim // 3,
- qkv_weight_out_dim // 3 * 2,
- self.sort_qkv_output_stats,
- ),
- v=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value",
- qkv_weight_out_dim // 3 * 2,
- qkv_weight_out_dim,
- self.sort_qkv_output_stats,
- ),
- attn_fc=QuantInput(
- self_attn.dense.weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.dense",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in BloomForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.self_attention.dense
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.dense_4h_to_h
-
- def get_tf_blocks(self, model: BloomForCausalLM) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.transformer.h)
diff --git a/friendli/modules/quantizer/smoothquant/models/codegen.py b/friendli/modules/quantizer/smoothquant/models/codegen.py
deleted file mode 100644
index 00455186..00000000
--- a/friendli/modules/quantizer/smoothquant/models/codegen.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli CodeGenForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-import copy
-from typing import Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.codegen import CodeGenForCausalLM # type: ignore[import]
-
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantCodeGenHook(SmoothQuantHook):
- """SmoothQuant Hook for CodeGenForCausalLM."""
-
- def pre_smooth(self, model: torch.nn.Module) -> torch.nn.Module:
- """Pre-procedure for SmoothQuant in CodeGenForCausalLM that should be called before smooth() is called."""
- super().pre_smooth(model)
- for decoder_layer in cast(CodeGenForCausalLM, model).transformer.h:
- decoder_layer.add_module("ln_2", copy.deepcopy(decoder_layer.ln_1))
- return model
-
- def iter_smooth_norm_weights(
- self,
- model: CodeGenForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in CodeGenForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
-
- for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection, MLP FF 1 ] gets smoothed
- yield (
- [
- decoder_layer.ln_1.weight.data,
- decoder_layer.ln_1.bias.data,
- ],
- [
- decoder_layer.attn.qkv_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.attn.qkv_proj",
- )
- yield (
- [
- decoder_layer.ln_2.weight.data,
- decoder_layer.ln_2.bias.data,
- ],
- [
- decoder_layer.mlp.fc_in.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.mlp.fc_in",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.attn.out_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.mlp.fc_out.weight.data],
- f"{self.quantized_layer_prefix}{index}.mlp.fc_out",
- )
-
- def reshape_qkv_weight(
- self, attn_layer: torch.nn.Module
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
- """Reshapes the qkv weight in CodeGenForCausalLM for Quantization."""
- qkv_layer = cast(torch.nn.Linear, attn_layer.qkv_proj)
- original_qkv_weight = qkv_layer.weight
- reshaped_qkv_weight = original_qkv_weight.reshape(
- (4, original_qkv_weight.size(0) // 4, original_qkv_weight.size(1))
- )
- q_weight, v_weight, k_weight = torch.split(
- reshaped_qkv_weight, reshaped_qkv_weight.size(1) // 3, dim=1
- )
- q_weight = q_weight.reshape((-1, q_weight.size(2)))
- k_weight = k_weight.reshape((-1, k_weight.size(2)))
- v_weight = v_weight.reshape((-1, v_weight.size(2)))
-
- return q_weight, k_weight, v_weight
-
- def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor:
- """Sorts the max output stats of qkv_proj in CodeGenForCausalLM."""
- reshpaed_max_output_stat = max_output_stat.reshape(
- (4, max_output_stat.size(0) // 4)
- )
- q_max_output_stat, v_max_output_stat, k_max_output_stat = torch.split(
- reshpaed_max_output_stat, reshpaed_max_output_stat.size(1) // 3, dim=1
- )
- q_max_output_stat = q_max_output_stat.reshape((-1,))
- k_max_output_stat = k_max_output_stat.reshape((-1,))
- v_max_output_stat = v_max_output_stat.reshape((-1,))
- return torch.cat(
- (q_max_output_stat, k_max_output_stat, v_max_output_stat), dim=0
- )
-
- def iter_tf_quant_inputs(
- self, model: CodeGenForCausalLM
- ) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of CodeGenForCausalLM."""
- for index, decoder_layer in enumerate(model.transformer.h):
- self_attn = decoder_layer.attn
- q_weight, k_weight, v_weight = self.reshape_qkv_weight(self_attn)
- qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0)
- attn_weight_outdim = qkv_weight.size(0) # OutDim
- fc1 = decoder_layer.mlp.fc_in
- fc2 = decoder_layer.mlp.fc_out
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attn.qkv_proj",
- 0,
- attn_weight_outdim // 3,
- self.sort_qkv_output_stats,
- ),
- k=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attn.qkv_proj",
- attn_weight_outdim // 3,
- attn_weight_outdim // 3 * 2,
- self.sort_qkv_output_stats,
- ),
- v=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attn.qkv_proj",
- attn_weight_outdim // 3 * 2,
- attn_weight_outdim,
- self.sort_qkv_output_stats,
- ),
- attn_fc=QuantInput(
- self_attn.out_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.fc_in",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.fc_out",
- None,
- None,
- ),
- )
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Returns the list of conversion informations for modified layers in CodeGenForCausalLM."""
- convert_info_list = super().modified_layers_convert_info_list
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_2.weight"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.converter.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_2.bias"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.converter.ln_bias_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in CodeGenForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.attn.out_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.fc_out
-
- def get_tf_blocks(self, model: CodeGenForCausalLM) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.transformer.h)
diff --git a/friendli/modules/quantizer/smoothquant/models/falcon.py b/friendli/modules/quantizer/smoothquant/models/falcon.py
deleted file mode 100644
index 7722f9ba..00000000
--- a/friendli/modules/quantizer/smoothquant/models/falcon.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli FalconForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Any, Dict, Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.falcon import ( # type: ignore[import]
- FalconConfig,
- FalconForCausalLM,
-)
-
-from friendli.modules.converter.base import OneOfConverter
-from friendli.modules.converter.utils import convert_to_gpt_j_params
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantFalconHook(SmoothQuantHook):
- """SmoothQuant Hook for FalconForCausalLM."""
-
- def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter):
- """Initialize SmoothQuantFalconHook."""
- super().__init__(quant_config, converter)
- config = cast(FalconConfig, converter.config)
- self.num_attention_heads = config.num_attention_heads
- self.hidden_size = config.hidden_size
- self.head_size = self.hidden_size // self.num_attention_heads
- self.rotary_dim = self.head_size
- self.num_kv_attention_heads = self.get_num_kv_attention_heads(config)
-
- def get_num_kv_attention_heads(self, config: FalconConfig) -> int:
- """Returns the number of key-value attention heads in FalconForCausalLM."""
- if config.new_decoder_architecture:
- if config.num_kv_heads is not None:
- return config.num_kv_heads
- return config.num_attention_heads
-
- if config.multi_query:
- return 1
-
- if config.num_kv_heads is not None:
- return config.num_kv_heads
- return config.num_attention_heads
-
- def iter_smooth_norm_weights(
- self,
- model: FalconForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in FalconForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr]
- if cast(FalconConfig, self.converter.config).new_decoder_architecture:
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.ln_attn.weight.data,
- decoder_layer.ln_attn.bias.data,
- ],
- [
- decoder_layer.self_attention.query_key_value.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed
- yield (
- [
- decoder_layer.ln_mlp.weight.data,
- decoder_layer.ln_mlp.bias.data,
- ],
- [decoder_layer.mlp.dense_h_to_4h.weight.data], # [OutDim, InDim]
- f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h",
- )
- else:
- # [LayerNorm 1] - [ QKV projection ] gets smoothed ( MLP FF1 is not smoothed. No LayerNorm 2. )
- yield (
- [
- decoder_layer.input_layernorm.weight.data,
- ],
- [
- decoder_layer.self_attention.query_key_value.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical.
- )
-
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.self_attention.dense.weight.data],
- f"{self.quantized_layer_prefix}{index}.self_attention.dense",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.mlp.dense_4h_to_h.weight.data],
- f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h",
- )
-
- def reshape_qkv_weight(
- self, attn_layer: torch.nn.Module
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
- """Reshapes the qkv weight in FalconForCausalLM for Quantization."""
- qkv_weight = cast(torch.nn.Linear, attn_layer.query_key_value).weight
- num_queries_per_kv = self.num_attention_heads // self.num_kv_attention_heads
-
- qkv_weight = qkv_weight.reshape(
- self.num_kv_attention_heads,
- num_queries_per_kv + 2,
- self.head_size,
- self.hidden_size,
- )
-
- q_weight = qkv_weight[:, :num_queries_per_kv].reshape(
- self.num_kv_attention_heads * num_queries_per_kv,
- self.head_size,
- self.hidden_size,
- )
- k_weight = qkv_weight[:, [-2]].reshape(
- self.num_kv_attention_heads,
- self.head_size,
- self.hidden_size,
- )
- v_weight = qkv_weight[:, [-1]].reshape(
- self.num_kv_attention_heads * self.head_size,
- self.hidden_size,
- )
-
- q_weight = convert_to_gpt_j_params(q_weight, self.rotary_dim)
- k_weight = convert_to_gpt_j_params(k_weight, self.rotary_dim)
-
- q_weight = q_weight.reshape(
- self.num_kv_attention_heads * num_queries_per_kv * self.head_size,
- self.hidden_size,
- )
- k_weight = k_weight.reshape(
- self.num_kv_attention_heads * self.head_size,
- self.hidden_size,
- )
-
- return q_weight, k_weight, v_weight
-
- def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor:
- """Sort max output stats of qkv_layer in FalconForCausalLM."""
- num_queries_per_kv = self.num_attention_heads // self.num_kv_attention_heads
- qkv_output_stat = max_output_stat.reshape(
- self.num_kv_attention_heads,
- num_queries_per_kv + 2,
- self.head_size,
- )
- q_out_stats = qkv_output_stat[:, :num_queries_per_kv].reshape(
- self.num_kv_attention_heads * num_queries_per_kv,
- self.head_size,
- )
- k_out_stats = qkv_output_stat[:, [-2]].reshape(
- self.num_kv_attention_heads,
- self.head_size,
- )
- v_out_stats = qkv_output_stat[:, [-1]].reshape(
- self.num_kv_attention_heads * self.head_size,
- )
- q_out_stats = convert_to_gpt_j_params(q_out_stats, self.rotary_dim)
- k_out_stats = convert_to_gpt_j_params(k_out_stats, self.rotary_dim)
- q_out_stats = q_out_stats.reshape(
- self.num_kv_attention_heads * num_queries_per_kv * self.head_size,
- )
- k_out_stats = k_out_stats.reshape(
- self.num_kv_attention_heads * self.head_size,
- )
-
- return torch.cat((q_out_stats, k_out_stats, v_out_stats), dim=0)
-
- def iter_tf_quant_inputs(self, model: FalconForCausalLM) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of FalconForCausalLM."""
- for index, decoder_layer in enumerate(model.transformer.h):
- self_attn = decoder_layer.self_attention
- q_weight, k_weight, v_weight = self.reshape_qkv_weight(self_attn)
- qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0)
- fc1 = decoder_layer.mlp.dense_h_to_4h
- fc2 = decoder_layer.mlp.dense_4h_to_h
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value",
- 0,
- q_weight.size(0),
- self.sort_qkv_output_stats,
- ),
- k=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value",
- q_weight.size(0),
- q_weight.size(0) + k_weight.size(0),
- self.sort_qkv_output_stats,
- ),
- v=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.query_key_value",
- q_weight.size(0) + k_weight.size(0),
- qkv_weight.size(0),
- self.sort_qkv_output_stats,
- ),
- attn_fc=QuantInput(
- self_attn.dense.weight,
- f"{self.quantized_layer_prefix}{index}.self_attention.dense",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in FalconForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.self_attention.dense
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.dense_4h_to_h
-
- def get_tf_blocks(self, model: FalconForCausalLM) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.transformer.h)
diff --git a/friendli/modules/quantizer/smoothquant/models/gpt2.py b/friendli/modules/quantizer/smoothquant/models/gpt2.py
deleted file mode 100644
index 50a20695..00000000
--- a/friendli/modules/quantizer/smoothquant/models/gpt2.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPT2LMHeadModel QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.gpt2 import GPT2LMHeadModel # type: ignore[import]
-from transformers.pytorch_utils import Conv1D # type: ignore[import]
-
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantGPT2Hook(SmoothQuantHook):
- """SmoothQuant Hook for GPT2LMHeadModel."""
-
- def iter_smooth_norm_weights(
- self, model: GPT2LMHeadModel
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in GPT2LMHeadModel."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.ln_1.weight.data,
- decoder_layer.ln_1.bias.data,
- ],
- [
- decoder_layer.attn.c_attn.weight.data.transpose(
- 0, 1
- ), # [OutDim, InDim]
- ],
- f"{self.quantized_layer_prefix}{index}.attn.c_attn", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed
- yield (
- [
- decoder_layer.ln_2.weight.data,
- decoder_layer.ln_2.bias.data,
- ],
- [decoder_layer.mlp.c_fc.weight.data.transpose(0, 1)], # [OutDim, InDim]
- f"{self.quantized_layer_prefix}{index}.mlp.c_fc",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data.transpose(0, 1)],
- [decoder_layer.attn.c_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.attn.c_proj",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data.transpose(0, 1)],
- [decoder_layer.mlp.c_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.mlp.c_proj",
- )
-
- def iter_tf_quant_inputs(self, model: GPT2LMHeadModel) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of GPT2LMHeadModel."""
- for index, decoder_layer in enumerate(model.transformer.h):
- attn = decoder_layer.attn
- attn_weight_outdim = attn.c_attn.nf # OutDim
- fc1 = decoder_layer.mlp.c_fc
- fc2 = decoder_layer.mlp.c_proj
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- attn.c_attn.weight.transpose(0, 1),
- f"{self.quantized_layer_prefix}{index}.attn.c_attn",
- 0,
- attn_weight_outdim // 3,
- ),
- k=QuantInput(
- attn.c_attn.weight.transpose(0, 1),
- f"{self.quantized_layer_prefix}{index}.attn.c_attn",
- attn_weight_outdim // 3,
- attn_weight_outdim // 3 * 2,
- ),
- v=QuantInput(
- attn.c_attn.weight.transpose(0, 1),
- f"{self.quantized_layer_prefix}{index}.attn.c_attn",
- attn_weight_outdim // 3 * 2,
- attn_weight_outdim,
- ),
- attn_fc=QuantInput(
- attn.c_proj.weight.transpose(0, 1),
- f"{self.quantized_layer_prefix}{index}.attn.c_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight.transpose(0, 1),
- f"{self.quantized_layer_prefix}{index}.mlp.c_fc",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight.transpose(0, 1),
- f"{self.quantized_layer_prefix}{index}.mlp.c_proj",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in GPT2LMHeadModel."""
- return (Conv1D,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.attn.c_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.c_proj
-
- def get_tf_blocks(self, model: GPT2LMHeadModel) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.transformer.h)
diff --git a/friendli/modules/quantizer/smoothquant/models/gpt_neox.py b/friendli/modules/quantizer/smoothquant/models/gpt_neox.py
deleted file mode 100644
index d2df5090..00000000
--- a/friendli/modules/quantizer/smoothquant/models/gpt_neox.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPTNeoXForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Any, Dict, Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.gpt_neox import ( # type: ignore[import]
- GPTNeoXConfig,
- GPTNeoXForCausalLM,
-)
-
-from friendli.modules.converter.base import OneOfConverter
-from friendli.modules.converter.utils import convert_to_gpt_j_params
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantGPTNeoXHook(SmoothQuantHook):
- """SmoothQuant Hook for GPTNeoXForCausalLM."""
-
- def __init__(self, quant_config: Dict[str, Any], converter: OneOfConverter):
- """Initialize SmoothQuantGPTNeoXHook."""
- super().__init__(quant_config, converter)
- config = cast(GPTNeoXConfig, converter.config)
- self.num_attention_heads = config.num_attention_heads
- self.num_kv_attention_heads = config.num_attention_heads
- self.hidden_size = config.hidden_size
- self.head_size = self.hidden_size // self.num_attention_heads
- self.rotary_dim = int(self.head_size * config.rotary_pct)
-
- def iter_smooth_norm_weights(
- self,
- model: GPTNeoXForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in GPTNeoXForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- for index, decoder_layer in enumerate(model.gpt_neox.layers): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.input_layernorm.weight.data,
- decoder_layer.input_layernorm.bias.data,
- ],
- [
- decoder_layer.attention.query_key_value.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.attention.query_key_value", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed
- yield (
- [
- decoder_layer.post_attention_layernorm.weight.data,
- decoder_layer.post_attention_layernorm.bias.data,
- ],
- [decoder_layer.mlp.dense_h_to_4h.weight.data], # [OutDim, InDim]
- f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.attention.dense.weight.data],
- f"{self.quantized_layer_prefix}{index}.attention.dense",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.mlp.dense_4h_to_h.weight.data],
- f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h",
- )
-
- def reshape_qkv_weight(
- self, attn_layer: torch.nn.Module
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
- """Reshape GPTNeoXForCausalLM's qkv weight for int8 quantization."""
- qkv_weight = cast(torch.nn.Linear, attn_layer).weight
- qkv_weight = qkv_weight.reshape(
- self.num_attention_heads,
- 3,
- self.head_size,
- self.hidden_size,
- )
-
- q_weight = qkv_weight[:, 0].reshape(
- self.num_attention_heads,
- self.head_size,
- self.hidden_size,
- )
- k_weight = qkv_weight[:, 1].reshape(
- self.num_attention_heads,
- self.head_size,
- self.hidden_size,
- )
- v_weight = qkv_weight[:, 2].reshape(
- self.num_attention_heads * self.head_size,
- self.hidden_size,
- )
-
- q_weight = convert_to_gpt_j_params(param=q_weight, rotary_dim=self.rotary_dim)
- k_weight = convert_to_gpt_j_params(param=k_weight, rotary_dim=self.rotary_dim)
- q_weight = q_weight.reshape(
- self.num_attention_heads * self.head_size,
- self.hidden_size,
- )
- k_weight = k_weight.reshape(
- self.num_attention_heads * self.head_size,
- self.hidden_size,
- )
- return q_weight, k_weight, v_weight
-
- def sort_qkv_output_stats(self, max_output_stat: torch.Tensor) -> torch.Tensor:
- """Sort max output stats of qkv_layer in GPTNeoXForCausalLM."""
- max_output_stat = max_output_stat.reshape(
- self.num_attention_heads,
- 3,
- self.head_size,
- )
- q_output_stat = max_output_stat[:, 0].reshape(
- self.num_attention_heads,
- self.head_size,
- )
- k_output_stat = max_output_stat[:, 1].reshape(
- self.num_attention_heads,
- self.head_size,
- )
- v_output_stat = max_output_stat[:, 2].reshape(
- self.num_attention_heads * self.head_size,
- )
- q_output_stat = convert_to_gpt_j_params(q_output_stat, self.rotary_dim)
- k_output_stat = convert_to_gpt_j_params(k_output_stat, self.rotary_dim)
- q_output_stat = q_output_stat.reshape(
- self.num_attention_heads * self.head_size,
- )
- k_output_stat = k_output_stat.reshape(
- self.num_attention_heads * self.head_size,
- )
- return torch.cat((q_output_stat, k_output_stat, v_output_stat), dim=0)
-
- def iter_tf_quant_inputs(
- self, model: GPTNeoXForCausalLM
- ) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of GPTNeoXForCausalLM."""
- for index, decoder_layer in enumerate(model.gpt_neox.layers):
- attention = decoder_layer.attention
- attention_weight_outdim = attention.query_key_value.weight.size(0) # OutDim
- q_weight, k_weight, v_weight = self.reshape_qkv_weight(
- attention.query_key_value
- )
- qkv_weight = torch.cat((q_weight, k_weight, v_weight), dim=0)
- fc1 = decoder_layer.mlp.dense_h_to_4h
- fc2 = decoder_layer.mlp.dense_4h_to_h
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attention.query_key_value",
- 0,
- attention_weight_outdim // 3,
- self.sort_qkv_output_stats,
- ),
- k=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attention.query_key_value",
- attention_weight_outdim // 3,
- attention_weight_outdim // 3 * 2,
- self.sort_qkv_output_stats,
- ),
- v=QuantInput(
- qkv_weight,
- f"{self.quantized_layer_prefix}{index}.attention.query_key_value",
- attention_weight_outdim // 3 * 2,
- attention_weight_outdim,
- self.sort_qkv_output_stats,
- ),
- attn_fc=QuantInput(
- attention.dense.weight,
- f"{self.quantized_layer_prefix}{index}.attention.dense",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_h_to_4h",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.dense_4h_to_h",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in GPTNeoXForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.attention.dense
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.dense_4h_to_h
-
- def get_tf_blocks(self, model: GPTNeoXForCausalLM) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.gpt_neox.layers)
diff --git a/friendli/modules/quantizer/smoothquant/models/gptj.py b/friendli/modules/quantizer/smoothquant/models/gptj.py
deleted file mode 100644
index 77e15732..00000000
--- a/friendli/modules/quantizer/smoothquant/models/gptj.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli GPTJForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-import copy
-from typing import Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.gptj import GPTJForCausalLM # type: ignore[import]
-
-from friendli.modules.converter.base import DECODER_PREFIX
-from friendli.modules.converter.schema import ConvertInfo
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantGPTJHook(SmoothQuantHook):
- """SmoothQuant Hook for GPTJForCausalLM."""
-
- def pre_smooth(self, model: torch.nn.Module) -> torch.nn.Module:
- """Pre-procedure for SmoothQuant in GPTJForCausalLM that should be called before smooth() is called."""
- super().pre_smooth(model)
- for decoder_layer in cast(GPTJForCausalLM, model).transformer.h:
- decoder_layer.add_module("ln_2", copy.deepcopy(decoder_layer.ln_1))
- return model
-
- def iter_smooth_norm_weights(
- self,
- model: GPTJForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in GPTJForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- for index, decoder_layer in enumerate(model.transformer.h): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection] gets smoothed
- yield (
- [
- decoder_layer.ln_1.weight.data,
- decoder_layer.ln_1.bias.data,
- ],
- [
- decoder_layer.attn.q_proj.weight.data, # [OutDim, InDim]
- decoder_layer.attn.k_proj.weight.data, # [OutDim, InDim]
- decoder_layer.attn.v_proj.weight.data, # [OutDim, InDim]
- ],
- f"{self.quantized_layer_prefix}{index}.attn.q_proj", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 1] - [ MLP FF1 ] gets smoothed
- yield (
- [
- decoder_layer.ln_2.weight.data,
- decoder_layer.ln_2.bias.data,
- ],
- [
- decoder_layer.mlp.fc_in.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.mlp.fc_in",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.attn.out_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.mlp.fc_out.weight.data],
- f"{self.quantized_layer_prefix}{index}.mlp.fc_out",
- )
-
- def iter_tf_quant_inputs(self, model: GPTJForCausalLM) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of GPTJForCausalLM."""
- for index, decoder_layer in enumerate(model.transformer.h):
- attn = decoder_layer.attn
- fc1 = decoder_layer.mlp.fc_in
- fc2 = decoder_layer.mlp.fc_out
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- attn.q_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.q_proj",
- None,
- None,
- ),
- k=QuantInput(
- attn.k_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.k_proj",
- None,
- None,
- ),
- v=QuantInput(
- attn.v_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.v_proj",
- None,
- None,
- ),
- attn_fc=QuantInput(
- attn.out_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.fc_in",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.fc_out",
- None,
- None,
- ),
- )
-
- @property
- def modified_layers_convert_info_list(
- self,
- ) -> List[ConvertInfo]:
- """Returns the modified layers' convert dict in GPTJForCausalLM."""
- convert_info_list = super().modified_layers_convert_info_list
-
- for i in range(self.converter.decoder_layer_num):
- layer_prefix = f"{self.quantized_layer_prefix}{i}."
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- convert_info_list.extend(
- [
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_2.weight"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}ln_2/gamma:0",
- reshape_fn=self.converter.ln_weight_reshape,
- ),
- ConvertInfo(
- param_names=[f"{layer_prefix}ln_2.bias"],
- data_type=self.converter.data_type,
- converted_name=f"{converted_prefix}ln_2/beta:0",
- reshape_fn=self.converter.ln_bias_reshape,
- ),
- ]
- )
-
- return convert_info_list
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in GPTJForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.attn.out_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.fc_out
-
- def get_tf_blocks(self, model: GPTJForCausalLM) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.transformer.h)
diff --git a/friendli/modules/quantizer/smoothquant/models/llama.py b/friendli/modules/quantizer/smoothquant/models/llama.py
deleted file mode 100644
index 5256401a..00000000
--- a/friendli/modules/quantizer/smoothquant/models/llama.py
+++ /dev/null
@@ -1,239 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli LlamaForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-import copy
-from dataclasses import dataclass
-from typing import Any, Dict, Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.llama import ( # type: ignore[import]
- LlamaConfig,
- LlamaForCausalLM,
-)
-
-from friendli.modules.converter.base import DECODER_PREFIX, OneOfConverter
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import (
- ModuleName,
- QuantInput,
- TFQuantInputs,
- TFQuantResults,
- WeightActQuantResult,
-)
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-from friendli.modules.quantizer.utils import get_weight_act_quant_scales
-
-
-@dataclass
-class LlamaTFQuantInput(TFQuantInputs):
- """Dataclass for int8 quantization input per layer in LlamaForCausalLM.""" ""
-
- ff_gate: QuantInput
-
-
-@dataclass
-class LlamaTFQuantResults(TFQuantResults):
- """Dataclass for int8 quantization result per a transformer block in LlamaForCausalLM.""" ""
-
- ff_gate: WeightActQuantResult
-
-
-class SmoothQuantLlamaHook(SmoothQuantHook):
- """SmoothQuant Hook for LlamaForCausalLM."""
-
- def __init__(self, quant_config: SmoothQuantConfig, converter: OneOfConverter):
- """Initialize SmoothQuantLlamaHook."""
- super().__init__(quant_config, converter)
- config = cast(LlamaConfig, converter.config)
- self.num_attention_heads = config.num_attention_heads
- if config.num_key_value_heads is None:
- self.num_kv_attention_heads = self.num_attention_heads
- else:
- self.num_kv_attention_heads = config.num_key_value_heads
- self.hidden_size = config.hidden_size
- self.head_size = self.hidden_size // self.num_attention_heads
- self.rotary_dim = self.head_size
-
- def pre_smooth(self, model: torch.nn.Module) -> torch.nn.Module:
- """Pre-procedure for SmoothQuant in LlamaForCausalLM that should be called before smooth() is called."""
- super().pre_smooth(model)
- for decoder_layer in cast(LlamaForCausalLM, model).model.layers:
- decoder_layer.add_module(
- "post_attention_layernorm_2",
- copy.deepcopy(decoder_layer.post_attention_layernorm),
- )
- return model
-
- def iter_smooth_norm_weights(
- self,
- model: LlamaForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in LlamaForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
-
- for index, decoder_layer in enumerate(model.model.layers): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.input_layernorm.weight.data,
- ],
- [
- decoder_layer.self_attn.q_proj.weight.data,
- decoder_layer.self_attn.k_proj.weight.data,
- decoder_layer.self_attn.v_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed
- yield (
- [
- decoder_layer.post_attention_layernorm.weight.data,
- ],
- [
- decoder_layer.mlp.up_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.mlp.up_proj",
- )
- # [LayerNomr 2] = [ MLP GATED FF ] gets smoothed
- yield (
- [
- decoder_layer.post_attention_layernorm_2.weight.data,
- ],
- [
- decoder_layer.mlp.gate_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.mlp.gate_proj",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.self_attn.o_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- )
-
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.mlp.down_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.mlp.down_proj",
- )
-
- def iter_tf_quant_inputs(self, model: LlamaForCausalLM) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of LlamaForCausalLM."""
- for index, decoder_layer in enumerate(model.model.layers):
- self_attn = decoder_layer.self_attn
- fc1 = decoder_layer.mlp.up_proj
- ff_gate = decoder_layer.mlp.gate_proj
- fc2 = decoder_layer.mlp.down_proj
-
- yield LlamaTFQuantInput(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- self_attn.q_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj",
- None,
- None,
- ),
- k=QuantInput(
- self_attn.k_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.k_proj",
- None,
- None,
- ),
- v=QuantInput(
- self_attn.v_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.v_proj",
- None,
- None,
- ),
- attn_fc=QuantInput(
- self_attn.o_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.up_proj",
- None,
- None,
- ),
- ff_gate=QuantInput(
- ff_gate.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.gate_proj",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.mlp.down_proj",
- None,
- None,
- ),
- )
-
- def get_quant_result(
- self,
- quant_input: TFQuantInputs,
- **kwargs: Any,
- ) -> TFQuantResults:
- """Returns the quantization result for a specific layer in LlamaForCausalLM."""
- max_input_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_input_stats"]
- max_output_stats: Dict[ModuleName, torch.Tensor] = kwargs["max_output_stats"]
-
- def get_scale(quant_input: QuantInput) -> WeightActQuantResult:
- weight, name, start, end = (
- quant_input.weight,
- quant_input.name,
- quant_input.start_offset,
- quant_input.end_offset,
- )
- return get_weight_act_quant_scales(
- name,
- max_input_stats[name],
- weight[start:end],
- max_output_stats[name][start:end],
- )
-
- quant_input = cast(LlamaTFQuantInput, quant_input)
- return LlamaTFQuantResults(
- layer_prefix_with_index=f"{self.quantized_layer_prefix}{quant_input.layer_index}.",
- q=get_scale(quant_input.q),
- k=get_scale(quant_input.k),
- v=get_scale(quant_input.v),
- attn_fc=get_scale(quant_input.attn_fc),
- ff1=get_scale(quant_input.ff1),
- ff_gate=get_scale(quant_input.ff_gate),
- ff2=get_scale(quant_input.ff2),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in LlamaForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.self_attn.o_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.down_proj
-
- def get_tf_blocks(self, model: LlamaForCausalLM) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.model.layers)
-
- @property
- def quantized_param_names(self) -> List[str]:
- """Returns the parameter names in LlamaForCausalLM."""
- param_names = super().quantized_param_names
- for i in range(self.converter.decoder_layer_num):
- converted_prefix = f"{DECODER_PREFIX}/h_._{i}/"
- param_names.append(f"{converted_prefix}mlp/c_gate/weight:0")
- return param_names
diff --git a/friendli/modules/quantizer/smoothquant/models/mpt.py b/friendli/modules/quantizer/smoothquant/models/mpt.py
deleted file mode 100644
index a72561fd..00000000
--- a/friendli/modules/quantizer/smoothquant/models/mpt.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli MPTForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type, cast
-
-import torch
-
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantMPTHook(SmoothQuantHook):
- """SmoothQuant Hook for MPTForCausalLM."""
-
- def iter_smooth_norm_weights(
- self,
- model: torch.nn.Module,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in MPTForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
-
- for index, decoder_layer in enumerate(
- model.transformer.blocks # type: ignore[union-attr, arg-type]
- ):
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [decoder_layer.norm_1.weight.data],
- [decoder_layer.attn.Wqkv.weight.data],
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- )
- # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed
- yield (
- [decoder_layer.norm_2.weight.data],
- [decoder_layer.ffn.up_proj.weight.data], # [OutDim, InDim]
- f"{self.quantized_layer_prefix}{index}.ffn.up_proj",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.attn.out_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.ffn.down_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.ffn.down_proj",
- )
-
- def iter_tf_quant_inputs(self, model: torch.nn.Module) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of MPTForCausalLM."""
- for index, decoder_layer in enumerate(
- model.transformer.blocks # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.attn
- q_outdim = (
- self.converter.decoder_num_attention_heads
- * self.converter.decoder_head_size
- )
- kv_outdim = (
- self.converter.decoder_num_kv_attention_heads
- * self.converter.decoder_head_size
- )
- qkv_outdim = self_attn.Wqkv.weight.size(0)
- assert qkv_outdim == q_outdim + kv_outdim * 2
- fc1 = decoder_layer.ffn.up_proj
- fc2 = decoder_layer.ffn.down_proj
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- self_attn.Wqkv.weight,
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- 0,
- q_outdim,
- ),
- k=QuantInput(
- self_attn.Wqkv.weight,
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- q_outdim,
- q_outdim + kv_outdim,
- ),
- v=QuantInput(
- self_attn.Wqkv.weight,
- f"{self.quantized_layer_prefix}{index}.attn.Wqkv",
- q_outdim + kv_outdim,
- qkv_outdim,
- ),
- attn_fc=QuantInput(
- self_attn.out_proj.weight,
- f"{self.quantized_layer_prefix}{index}.attn.out_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.ffn.up_proj",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.ffn.down_proj",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in MPTForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.attn.out_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.ffn.down_proj
-
- def get_tf_blocks(self, model: torch.nn.Module) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.transformer.blocks)
diff --git a/friendli/modules/quantizer/smoothquant/models/opt.py b/friendli/modules/quantizer/smoothquant/models/opt.py
deleted file mode 100644
index ed6d8292..00000000
--- a/friendli/modules/quantizer/smoothquant/models/opt.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli OPTForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers.models.opt import OPTForCausalLM # type: ignore[import]
-
-from friendli.modules.quantizer.schema.config import SmoothQuantConfig
-from friendli.modules.quantizer.schema.data import ModuleName, QuantInput, TFQuantInputs
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantHook
-
-
-class SmoothQuantOPTHook(SmoothQuantHook):
- """SmoothQuant Hook for OPTForCausalLM."""
-
- def iter_smooth_norm_weights(
- self, model: OPTForCausalLM
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of layernorm's weight and linear layer's weight per transformer block in OPTForCausalLM."""
- quant_args = cast(SmoothQuantConfig, self.quant_config).smoothquant_args
- for index, decoder_layer in enumerate(model.model.decoder.layers): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.self_attn_layer_norm.weight.data,
- decoder_layer.self_attn_layer_norm.bias.data,
- ],
- [
- decoder_layer.self_attn.q_proj.weight.data,
- decoder_layer.self_attn.k_proj.weight.data,
- decoder_layer.self_attn.v_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 2] - [ MLP FF 1 ] gets smoothed
- yield (
- [
- decoder_layer.final_layer_norm.weight.data,
- decoder_layer.final_layer_norm.bias.data,
- ],
- [decoder_layer.fc1.weight.data],
- f"{self.quantized_layer_prefix}{index}.fc1",
- )
- if quant_args.attn_fc_smoothing:
- yield (
- [decoder_layer.attn_fc_pre_smoother.scale.data],
- [decoder_layer.self_attn.out_proj.weight.data],
- f"{self.quantized_layer_prefix}{index}.self_attn.out_proj",
- )
- if quant_args.ff2_smoothing:
- yield (
- [decoder_layer.ff2_pre_smoother.scale.data],
- [decoder_layer.fc2.weight.data],
- f"{self.quantized_layer_prefix}{index}.fc2",
- )
-
- def iter_tf_quant_inputs(self, model: OPTForCausalLM) -> Iterator[TFQuantInputs]:
- """Returns the layers which should be quantized in transformer block of OPTForCausalLM."""
- for index, decoder_layer in enumerate(model.model.decoder.layers):
- self_attn = decoder_layer.self_attn
- fc1 = decoder_layer.fc1
- fc2 = decoder_layer.fc2
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- q=QuantInput(
- self_attn.q_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj",
- None,
- None,
- ),
- k=QuantInput(
- self_attn.k_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.k_proj",
- None,
- None,
- ),
- v=QuantInput(
- self_attn.v_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.v_proj",
- None,
- None,
- ),
- attn_fc=QuantInput(
- self_attn.out_proj.weight,
- f"{self.quantized_layer_prefix}{index}.self_attn.out_proj",
- None,
- None,
- ),
- ff1=QuantInput(
- fc1.weight,
- f"{self.quantized_layer_prefix}{index}.fc1",
- None,
- None,
- ),
- ff2=QuantInput(
- fc2.weight,
- f"{self.quantized_layer_prefix}{index}.fc2",
- None,
- None,
- ),
- )
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Returns the linear layer types in OPTForCausalLM."""
- return (torch.nn.Linear,)
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after attention in the decoder layer."""
- return decoder_layer.self_attn.out_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the linear layer after FF1 in the decoder layer."""
- return decoder_layer.fc2
-
- def get_tf_blocks(self, model: OPTForCausalLM) -> List[torch.nn.Module]:
- """Returns the decoder layers(transformer blocks) in the model."""
- return list(model.model.decoder.layers)
diff --git a/friendli/modules/quantizer/utils.py b/friendli/modules/quantizer/utils.py
deleted file mode 100644
index 1e47030b..00000000
--- a/friendli/modules/quantizer/utils.py
+++ /dev/null
@@ -1,514 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Quantizer Utils."""
-
-from __future__ import annotations
-
-import os
-from contextlib import contextmanager
-from itertools import islice
-from typing import (
- Any,
- Callable,
- Dict,
- Iterable,
- Iterator,
- List,
- Protocol,
- Sequence,
- Tuple,
- Type,
- TypeVar,
- Union,
-)
-
-import datasets # type: ignore[import]
-import torch
-from accelerate import cpu_offload_with_hook # type: ignore
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-
-from friendli.enums import ModelDataType
-from friendli.errors import InvalidConfigError, QuantizationError
-from friendli.logging import logger
-from friendli.modules.quantizer.schema.config import CalibrationDatasetConfig
-from friendli.modules.quantizer.schema.data import (
- ModuleName,
- WeightActQuantResult,
- WeightOnlyQuantResult,
-)
-
-
-def scale_reshape(
- params: List[torch.Tensor],
-) -> torch.Tensor:
- """Reshape scale/zero of quantized layers."""
- if len(params) == 1:
- t = params[0]
- else:
- t = torch.cat(params, dim=1)
- return t
-
-
-def quantized_qkv_weight_reshape(
- params: List[torch.Tensor],
-) -> torch.Tensor:
- """Reshape weight of quantized qkv layers."""
- assert len(params) == 3
- qkv_weight = torch.concat(
- params,
- dim=0,
- ) # [OutDim, InDim]
-
- return qkv_weight.to(torch.uint8)
-
-
-def quantized_linear_weight_reshape(
- params: List[torch.Tensor],
-) -> torch.Tensor:
- """Reshape weight of quantized linear layers."""
- assert len(params) == 1
-
- return params[0].to(torch.uint8)
-
-
-def safe_load_datasets(data_cfg: CalibrationDatasetConfig) -> datasets.Dataset:
- """Load dataset from calibration dataset config."""
- data_path = data_cfg.path_or_name
- data_split = data_cfg.split
-
- try:
- if os.path.exists(data_path):
- dataset = datasets.load_dataset(
- data_cfg.format,
- data_files=data_path,
- split=data_split,
- )
- else:
- data_name_parts = data_path.split(":")
- if len(data_name_parts) == 1:
- dataset = datasets.load_dataset(data_path, split=data_split)
- elif len(data_name_parts) == 2:
- data_name, subset_name = data_name_parts
- dataset = datasets.load_dataset(
- data_name, subset_name, split=data_split
- )
- else:
- raise InvalidConfigError(
- "Dataset name is in invalid format. "
- "(valid format: '' or ':')"
- )
- except ValueError as err:
- raise QuantizationError(f"datasets.load_dataset failed. {str(err)}") from err
-
- if not isinstance(dataset, datasets.Dataset):
- raise InvalidConfigError(
- "This dataset format is not supported for the calibration."
- )
-
- return dataset
-
-
-T = TypeVar("T")
-
-
-def batched(it: Iterator[T], n: int) -> Iterator[List[T]]:
- """Batch an iterator into lists of size n."""
- # batched('ABCDEFG', 3) --> ABC DEF G
- while True:
- batch = list(islice(it, n))
- if not batch:
- return
- yield batch
-
-
-def build_percentile_statistics(
- scale_percentile: float,
- symmetric: bool = True,
-) -> Tuple[Callable, Callable, Callable]:
- """Builds the hooks for getting the max input and output activations of a model."""
- logger.info(
- "Building percentile statistics hooks. scale_percentile: (%s)",
- scale_percentile,
- )
-
- max_input_M1: Dict[str, torch.Tensor] = {}
- max_input_M2: Dict[str, torch.Tensor] = {}
- max_input_num: Dict[str, torch.Tensor] = {}
- max_output_M1: Dict[str, torch.Tensor] = {}
- max_output_M2: Dict[str, torch.Tensor] = {}
- max_output_num: Dict[str, torch.Tensor] = {}
-
- def create_hook(name: ModuleName):
- def update_stats(
- max_M1: Dict[str, torch.Tensor],
- max_M2: Dict[str, torch.Tensor],
- max_num: Dict[str, int],
- new_t: torch.Tensor,
- ) -> None:
- # Chan's method for computing mean and variance incrementally
- new_t = new_t.detach().reshape(-1, new_t.size(-1))
- new_numel = new_t.size(0)
- new_t_M1 = new_t.to(torch.float64).mean(dim=0)
- if symmetric:
- # it is assumed samples are always centered on zero
- # in the symmetric quantization scheme
- new_t_M1.zero_()
- new_t_M2 = ((new_t.to(torch.float64) - new_t_M1) ** 2).sum(dim=0)
- try:
- pre_numel = max_num[name]
- max_num[name] += new_numel
- delta = new_t_M1 - max_M1[name]
- max_M1[name] += delta * (new_numel / max_num[name])
- max_M2[name] += new_t_M2 + torch.pow(delta, 2) * (
- pre_numel * new_numel / max_num[name]
- )
- except KeyError:
- max_num[name] = new_numel
- max_M1[name] = new_t_M1
- max_M2[name] = new_t_M2
-
- def hook(module, in_t_tup, out_t): # pylint: disable=unused-argument
- with torch.no_grad():
- in_t = in_t_tup[0]
- update_stats(max_input_M1, max_input_M2, max_input_num, in_t)
- update_stats(max_output_M1, max_output_M2, max_output_num, out_t)
-
- return hook
-
- def finish_input_stats():
- return {
- name: torch.distributions.Normal(
- loc=max_input_M1[name],
- scale=torch.sqrt(max_input_M2[name] / max_input_num[name]).clip(
- min=1e-7
- ),
- ).icdf(
- torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to(
- max_input_M1[name].device
- )
- )
- for name in list(max_input_M1.keys())
- }
-
- def finish_output_stats():
- return {
- name: torch.distributions.Normal(
- loc=max_output_M1[name],
- scale=torch.sqrt(max_output_M2[name] / max_output_num[name]).clip(
- min=1e-7
- ),
- ).icdf(
- torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to(
- max_output_M1[name].device
- )
- )
- for name in list(max_output_M1.keys())
- }
-
- return finish_input_stats, finish_output_stats, create_hook
-
-
-def build_max_statistics() -> Tuple[Callable, Callable, Callable]:
- """Builds the hooks for getting the max input and output activations of a model."""
- logger.info("Building max statistics hooks")
- max_input_stats: Dict[str, torch.Tensor] = {}
- max_output_stats: Dict[str, torch.Tensor] = {}
-
- def create_hook(name: ModuleName):
- def hook(modules, in_t_tup, out_t): # pylint: disable=unused-argument
- in_t = in_t_tup[0]
- in_t = (
- in_t.detach().abs().reshape(-1, in_t.size(-1)).max(dim=0).values
- ) # reduce-max only leaving the hidden dim (supposing the last dim is the hidden dim)
- out_t = out_t.detach().reshape(-1, out_t.size(-1))
- out_t = out_t.abs().max(dim=0).values
- try:
- max_input_stats[name] = torch.maximum(max_input_stats[name], in_t)
- except KeyError:
- max_input_stats[name] = in_t
- try:
- max_output_stats[name] = torch.maximum(max_output_stats[name], out_t)
- except KeyError:
- max_output_stats[name] = out_t
-
- return hook
-
- def finish_input_stats():
- return max_input_stats
-
- def finish_output_stats():
- return max_output_stats
-
- return finish_input_stats, finish_output_stats, create_hook
-
-
-@torch.no_grad()
-def collect_stats(
- model: torch.nn.Module,
- device: str,
- dataset: datasets.Dataset,
- target_classes: Tuple[Type[torch.nn.Module], ...],
- tqdm_desc: str,
- percentile: float,
- batch_size: int = 1,
-) -> Tuple[Dict[ModuleName, torch.Tensor], Dict[ModuleName, torch.Tensor]]:
- """Collects the maximum values of input and output activations of a specific model.
-
- Args:
- model (torch.nn.Module): The model for which we want to collect the max statistics.
- dataset (Dataset): Dataset that contains input tensors.
- target_classes (Tuple[Type[torch.nn.Module], ...]): A tuple of the target classes.
-
- Returns:
- A tuple of two dictionaries: (max_input_stats, max_output_stats), where:
- max_input_stats: The maximum input activation values for each module of the model.
- max_output_stats: The maximum output activation values for each module of the model.
-
- This function uses a forward hook to capture the maximum input and output activation values
- of the specified target_classes. The max_batch_size parameter controls the size of the input
- batches that are passed through the model.
-
- The function returns two dictionaries containing the maximum input and output activation
- values for each module of the model, respectively. These dictionaries can be used to calculate
- scaling factors for weight quantization and activation smoothing.
-
- """
- # pylint: disable=too-many-locals
- max_input_stats, max_output_stats, create_hook = (
- build_percentile_statistics(percentile)
- if percentile < 100.0
- else build_max_statistics()
- )
- name_mods = [
- (name, module)
- for name, module in model.named_modules()
- if isinstance(module, target_classes)
- ]
-
- calib_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
- removables = []
- for name, module in name_mods:
- removables.append(module.register_forward_hook(create_hook(name)))
- try:
- for inputs in tqdm(calib_dataloader, desc=tqdm_desc):
- model(inputs.to(device))
- finally:
- for removable in removables:
- removable.remove()
- return max_input_stats(), max_output_stats()
-
-
-def build_inps_hook():
- """Builds the hooks for getting the input and output activations of a module."""
- args_dict = {}
- kwargs_dict = {}
-
- def create_hook(name: ModuleName):
- def hook(m, args, kwargs, y): # pylint: disable=unused-argument
- assert name not in args_dict
- assert name not in kwargs_dict
- # assumption: all positional arguments are torch.Tensor
- args_dict[name] = [t.detach() for t in args]
- kwargs_dict[name] = {
- k: (v.detach() if isinstance(v, torch.Tensor) else v)
- for k, v in kwargs.items()
- }
-
- return hook
-
- return args_dict, kwargs_dict, create_hook
-
-
-def collect_inps(
- module: torch.nn.Module,
- module_args: Tuple[Any, ...],
- module_kwargs: Dict[str, Any],
- device: str,
- target_classes: Tuple[Type[torch.nn.Module], ...],
-) -> Tuple[Dict[ModuleName, Tuple[Any]], Dict[ModuleName, Dict[str, Any]]]:
- """Collects concated input and output activations of a specific module."""
- args_dict, kwargs_dict, create_hook = build_inps_hook()
- name_mods = [
- (name, m) for name, m in module.named_modules() if isinstance(m, target_classes)
- ]
-
- removables = []
- for name, m in name_mods:
- removables.append(m.register_forward_hook(create_hook(name), with_kwargs=True))
-
- module(
- *((t.to(device) if isinstance(t, torch.Tensor) else t) for t in module_args),
- **{
- k: (v.to(device) if isinstance(v, torch.Tensor) else v)
- for k, v in module_kwargs.items()
- },
- )
-
- for removable in removables:
- removable.remove()
-
- return args_dict, kwargs_dict
-
-
-def get_torch_quant_dtype(q_bit: int = 8):
- """Get torch quant data type from quant bit."""
- if q_bit == 8:
- return torch.int8
- if q_bit == 4:
- return torch.int32 # In AWQ, we use int32 to represent int4
- raise ValueError(f"Invalid quant bit: {q_bit}")
-
-
-@torch.no_grad()
-def get_weight_act_quant_scales(
- layer_name: str,
- input_max: torch.Tensor,
- target_weight: torch.Tensor,
- weight: torch.Tensor,
- output_max: torch.Tensor,
- device: str = "cpu",
- quant_dtype: ModelDataType = ModelDataType.INT8,
-) -> WeightActQuantResult:
- """Get the quantization scales and int8 weight for a specific layer."""
- # shape of input_max: [InChannels]
- # shape of output_max: [OutChannels]
- # shape of target_weight: [OutChannels, InChannels]
- assert input_max.ndim == 1
- assert output_max.ndim == 1
-
- assert quant_dtype == ModelDataType.INT8
-
- in_channels = input_max.size(0)
- out_channels = output_max.size(0)
- assert tuple(weight.size()) == (out_channels, in_channels)
-
- max_val = 2 ** (8 - 1) - 1
- min_val = -(2 ** (8 - 1))
-
- act_scale = float(input_max.detach().abs().max().item()) / float(max_val)
- weight_scale = float(target_weight.detach().abs().max().item()) / float(max_val)
-
- q_weight = (
- (weight.detach().float() / weight_scale)
- .round()
- .clip(min_val, max_val)
- .to(get_torch_quant_dtype(8))
- .to(device)
- )
-
- return WeightActQuantResult(
- layer_name,
- quant_dtype=quant_dtype,
- zero_point=torch.tensor(0.0),
- act_scale=torch.tensor(act_scale),
- weight_scale=torch.tensor(weight_scale),
- q_weight=q_weight,
- q_group_size=-1,
- )
-
-
-def get_weight_only_quant_scales(
- w: torch.Tensor,
- q_bit: int,
- q_group_size: int,
- layer_name: str = "",
- device: Union[str, torch.device] = "cpu",
-) -> WeightOnlyQuantResult:
- """Return the quantization scales of weight for a specific layer."""
- assert q_bit in [4, 8]
- org_w_shape = w.shape # [OutDim, InDim]
-
- w = w.reshape(-1, q_group_size) # [OutDim x num_groups, group_size]
- max_val = w.amax(dim=1, keepdim=True)
- min_val = w.amin(dim=1, keepdim=True)
-
- max_int = 2**q_bit - 1
- min_int = 0
-
- scales = (max_val - min_val).clamp(min=1e-5) / max_int
- zeros = (-torch.round(min_val / scales)).clamp_(min_int, max_int)
-
- assert torch.isnan(scales).sum() == 0
-
- q_weight = torch.clamp(torch.round(w / scales) + zeros, min_int, max_int)
- q_weight = q_weight.reshape(org_w_shape).detach().to(device)
- scales = (
- scales.view(org_w_shape[0], -1).transpose(0, 1).detach().to(device)
- ) # [OutDim, num_groups]
- zeros = (
- zeros.view(org_w_shape[0], -1).transpose(0, 1).detach().to(device)
- ) # [OutDim, num_groups]
-
- assert torch.isnan(q_weight).sum() == 0
-
- return WeightOnlyQuantResult(
- layer_name,
- quant_dtype=ModelDataType.INT4 if q_bit == 4 else ModelDataType.INT8,
- zero_point=zeros,
- q_group_size=q_group_size,
- weight_scale=scales,
- q_weight=q_weight,
- )
-
-
-def send_model_to_device(
- model: torch.nn.Module,
- device: Union[str, torch.device],
- *,
- exclude: Iterable[torch.nn.Module] = (),
-):
- """Send the model and its submodules onto device except for modules designated by `exclude`."""
- exclude_set = set(exclude)
-
- @torch.no_grad()
- def recurse(m: torch.nn.Module):
- if m in exclude_set:
- return
- for name, p in list(m.named_parameters(recurse=False)):
- m.register_parameter(name, torch.nn.Parameter(p.to(device)))
- for name, b in list(m.named_buffers(recurse=False)):
- m.register_buffer(name, b.to(device))
-
- for child in m.children():
- recurse(child)
-
- recurse(model)
-
-
-class RemovableOffloaderHook(Protocol):
- """Hook protocol for cpu offloader."""
-
- def offload(self) -> None:
- """Offload the associated block onto CPU."""
-
- def remove(self) -> None:
- """Remove this hook."""
-
-
-@contextmanager
-def offload_module_sequence(
- blocks: Sequence[torch.nn.Module], device: Union[str, torch.device]
-):
- """Offload a sequence of torch modules automatically.
-
- In the beginning, all blocks are supposed to reside on CPU.
- When i-th block is called, it is loaded onto `device` on the fly.
- And at the same time, it offloads (i-1)-th block back to CPU.
- """
- module_hooks: List[RemovableOffloaderHook] = []
- if blocks:
- prev_module_hook = None
- for tf_block in blocks:
- _, module_hook = cpu_offload_with_hook(
- tf_block, device, prev_module_hook=prev_module_hook
- )
- prev_module_hook = module_hook
- module_hooks.append(module_hook)
- try:
- yield
- finally:
- for hook in module_hooks:
- hook.offload()
- for hook in module_hooks:
- hook.remove()
diff --git a/friendli/modules/quantizer_v2/__init__.py b/friendli/modules/quantizer_v2/__init__.py
deleted file mode 100644
index 9ee5a33d..00000000
--- a/friendli/modules/quantizer_v2/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer V2."""
diff --git a/friendli/modules/quantizer_v2/base.py b/friendli/modules/quantizer_v2/base.py
deleted file mode 100644
index 08c48f2d..00000000
--- a/friendli/modules/quantizer_v2/base.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantization Interface."""
-
-from __future__ import annotations
-
-import os
-from abc import ABC, abstractmethod
-from contextlib import contextmanager
-from typing import Any, Dict, Iterator, List, Tuple, Type
-
-import huggingface_hub # type: ignore
-import torch
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-from transformers import PretrainedConfig, PreTrainedModel # type: ignore
-
-from friendli.errors import NotSupportedQuantConfigError
-from friendli.logging import logger
-from friendli.modules.quantizer_v2.enums import QuantDatasetFormat
-from friendli.modules.quantizer_v2.layers import (
- WeightActQuantizedLinearLayer,
- WeightOnlyQuantizedLinearLayer,
-)
-from friendli.modules.quantizer_v2.schema.config import OneOfQuantConfig
-from friendli.modules.quantizer_v2.schema.data import TFQuantInputs
-from friendli.modules.quantizer_v2.utils import (
- collect_stats,
- get_weight_act_quant_scales,
- get_weight_only_quant_scales,
- offload_module_sequence,
- send_model_to_device,
-)
-
-
-class AbstractQuantHookV2(ABC):
- """Abstract Quantization Hook for a specific model."""
-
- def __init__(self, quant_config: OneOfQuantConfig, model_config: PretrainedConfig):
- """Initialize the Quantization Hook.
-
- Args:
- quant_config (OneOfQuantConfig): Quantization configuration.
- model_config (PretrainedConfig): Model configuration.
- """
- self.quant_config = quant_config
- self.model_config = model_config
-
- @abstractmethod
- def check_model_config(self) -> None:
- """Check if the model is quantizable."""
-
- @abstractmethod
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module], ...]:
- """Get linear layer types in the model."""
-
- @abstractmethod
- def get_tf_blocks(self, model: PreTrainedModel) -> List[torch.nn.Module]:
- """Get tensor fusion blocks in the model."""
-
- @abstractmethod
- def iter_tf_quant_inputs(self, model: PreTrainedModel) -> Iterator[TFQuantInputs]:
- """Iterate over TFQuantInputs."""
-
- @property
- @abstractmethod
- def quantized_layer_prefix(self) -> str:
- """Returns the prefix of the transformer block name."""
-
-
-class AbstractQuantizerV2(ABC):
- """Abstract class for quantizer."""
-
- def __init__(self, hook: AbstractQuantHookV2, config: OneOfQuantConfig):
- """Initialize AbstractQuantizer."""
- self.config = config
- self.hook = hook
-
- def check_config(self) -> None:
- """Check if the model is quantizable."""
- self.hook.check_model_config()
- calibration_dataset_config = self.config.calibration_dataset
- data_path_or_name = calibration_dataset_config.path_or_name
- percentile = self.config.percentile
- if percentile <= 0 or percentile > 100:
- raise NotSupportedQuantConfigError(
- invalid_option=str(percentile),
- valid_options=["0 < percentile <= 100"],
- )
- if not os.path.exists(data_path_or_name):
- data_name = data_path_or_name.split(":")[0]
- if data_name not in (
- data.id for data in huggingface_hub.list_datasets(search=data_name)
- ):
- raise NotSupportedQuantConfigError(
- invalid_option=data_name,
- valid_options=["datasets on the huggingface hub", "local path"],
- )
- else:
- if calibration_dataset_config.format not in QuantDatasetFormat:
- raise NotSupportedQuantConfigError(
- invalid_option=calibration_dataset_config.format,
- valid_options=list(QuantDatasetFormat),
- )
- try:
- torch.device(self.config.device)
- except ValueError as err:
- raise NotSupportedQuantConfigError(
- invalid_option=self.config.device,
- valid_options=["cpu", "cuda"],
- ) from err
-
- @contextmanager
- def _try_offload_model(self, model: PreTrainedModel):
- if not self.config.offload:
- logger.info("Offloading not enabled. Skipping.")
- model.to(self.config.device)
- yield
- else:
- logger.info("Offloading enabled.")
- tf_blocks = self.hook.get_tf_blocks(model)
- send_model_to_device(model, self.config.device, exclude=tf_blocks)
- with offload_module_sequence(tf_blocks, self.config.device):
- yield
-
- @abstractmethod
- def quantize(self, model: PreTrainedModel) -> PreTrainedModel:
- """Quantize model."""
-
- def pre_quantize(self, model: PreTrainedModel) -> PreTrainedModel:
- """Preprocess model before quantization."""
-
- def post_quantize(self, model: PreTrainedModel) -> PreTrainedModel:
- """Postprocess model after quantization."""
-
- @abstractmethod
- def get_quant_config(self) -> Dict[str, Any]:
- """Get quantizer config."""
-
-
-class AbstractWeightOnlyQuantizer(AbstractQuantizerV2):
- """Abstract class for weight only quantizer."""
-
- def quantize(self, model: PreTrainedModel) -> PreTrainedModel:
- """Return quantized model."""
- with self._try_offload_model(model):
- for tf_quant_inputs in tqdm(
- self.hook.iter_tf_quant_inputs(model),
- total=len(self.hook.get_tf_blocks(model)),
- desc="Quantize model..",
- ):
- for quant_input in tf_quant_inputs.quant_inputs:
- parent_module, local_names, names = (
- quant_input.parent_module,
- quant_input.local_names,
- quant_input.target_names,
- )
- parent_modules_w_local_name = []
- if isinstance(parent_module, torch.nn.ModuleList):
- # For MoE models with seperate expert layers
- for p_module in parent_module:
- for local_name in local_names:
- parent_modules_w_local_name.append(
- (p_module, local_name)
- )
- else:
- assert isinstance(parent_module, torch.nn.Module)
- for local_name in local_names:
- parent_modules_w_local_name.append(
- (parent_module, local_name)
- )
- layers = [
- p_module.get_submodule(local_name)
- for p_module, local_name in parent_modules_w_local_name
- ]
- assert self.config.quant_scale_dtype
- quant_results = get_weight_only_quant_scales(
- model,
- names,
- quant_dtype=self.config.quant_dtype,
- quant_scale_dtype=self.config.quant_scale_dtype,
- q_group_size=self.config.quant_group_size,
- use_symmetric=self.config.use_symmetric,
- )
- q_layers = [
- WeightOnlyQuantizedLinearLayer.from_layer(layer, quant_result)
- for layer, quant_result in zip(layers, quant_results)
- ]
- for (p_module, local_name), q_layer in zip(
- parent_modules_w_local_name, q_layers
- ):
- setattr(p_module, local_name, q_layer)
- return model
-
-
-class AbstractWeightActQuantizer(AbstractQuantizerV2):
- """Abstract class for weight and activation quantizer."""
-
- @abstractmethod
- def get_calib_dataloader(self) -> DataLoader:
- """Get encoded calibration dataset."""
-
- def quantize(self, model: PreTrainedModel) -> PreTrainedModel:
- """Return quantized model."""
- with self._try_offload_model(model):
- max_input_stats, _ = collect_stats(
- model,
- self.config.device,
- self.get_calib_dataloader(),
- self.hook.get_linear_layer_types(),
- percentile=self.config.percentile,
- tqdm_desc="Collecting stats for Static Quantization.",
- )
- for tf_quant_inputs in tqdm(
- self.hook.iter_tf_quant_inputs(model),
- total=len(self.hook.get_tf_blocks(model)),
- desc="Quantize model..",
- ):
- for quant_input in tf_quant_inputs.quant_inputs:
- parent_module, local_names, names = (
- quant_input.parent_module,
- quant_input.local_names,
- quant_input.target_names,
- )
- parent_modules_w_local_name = []
- if isinstance(parent_module, torch.nn.ModuleList):
- # For MoE models with seperate expert layers
- for p_module in parent_module:
- for local_name in local_names:
- parent_modules_w_local_name.append(
- (p_module, local_name)
- )
- else:
- assert isinstance(parent_module, torch.nn.Module)
- for local_name in local_names:
- parent_modules_w_local_name.append((p_module, local_name))
- layers = [
- p_module.get_submodule(local_name)
- for p_module, local_name in parent_modules_w_local_name
- ]
- assert self.config.quant_scale_dtype
- quant_results = get_weight_act_quant_scales(
- model,
- names,
- max_input_stats,
- quant_scale_dtype=self.config.quant_scale_dtype,
- quant_dtype=self.config.quant_dtype,
- )
- q_layers = [
- WeightActQuantizedLinearLayer.from_layer(layer, quant_result)
- for layer, quant_result in zip(layers, quant_results)
- ]
- for (p_module, local_name), q_layer in zip(
- parent_modules_w_local_name, q_layers
- ):
- setattr(p_module, local_name, q_layer)
- return model
diff --git a/friendli/modules/quantizer_v2/enums.py b/friendli/modules/quantizer_v2/enums.py
deleted file mode 100644
index 18bc60c7..00000000
--- a/friendli/modules/quantizer_v2/enums.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer Enums."""
-
-
-from __future__ import annotations
-
-from enum import Enum
-
-
-class QuantMode(str, Enum):
- """Supported quantization modes."""
-
- INT8 = "int8"
- DUMMY = "dummy"
-
-
-class QuantDatasetFormat(str, Enum):
- """Supported file format for calibration datasets for quantization."""
-
- JSON = "json"
- CSV = "csv"
- PARQUET = "parquet"
- TXT = "txt"
-
-
-class Int8QuantType(str, Enum):
- """Int8Quant modes."""
-
- DYNAMIC = "dynamic"
-
-
-class ModelDataType(str, Enum):
- """Model dtype enums."""
-
- BF16 = "bf16"
- FP16 = "fp16"
- FP32 = "fp32"
- FP8_E4M3 = "fp8_e4m3"
- INT8 = "int8"
- INT4 = "int4"
diff --git a/friendli/modules/quantizer_v2/int8/__init__.py b/friendli/modules/quantizer_v2/int8/__init__.py
deleted file mode 100644
index 9f651b15..00000000
--- a/friendli/modules/quantizer_v2/int8/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Int8 Quantizer."""
diff --git a/friendli/modules/quantizer_v2/int8/base.py b/friendli/modules/quantizer_v2/int8/base.py
deleted file mode 100644
index 66e200a8..00000000
--- a/friendli/modules/quantizer_v2/int8/base.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Int8 Quantizer Base."""
-
-from __future__ import annotations
-
-from abc import abstractmethod
-from typing import Any, Dict, Iterator, List, Tuple, cast
-
-import torch
-from torch.utils.data import DataLoader
-from transformers import PreTrainedModel # type: ignore
-
-from friendli.modules.converter.utils import get_tokenizer
-from friendli.modules.quantizer_v2.base import (
- AbstractQuantHookV2,
- AbstractQuantizerV2,
- AbstractWeightActQuantizer,
- AbstractWeightOnlyQuantizer,
-)
-from friendli.modules.quantizer_v2.int8.utils import perform_smoothing
-from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig
-from friendli.modules.quantizer_v2.schema.data import ModuleName
-from friendli.modules.quantizer_v2.utils import collect_stats, safe_load_datasets
-
-
-class Int8QuantHook(AbstractQuantHookV2):
- """Int8 Quant Hook Base."""
-
- @abstractmethod
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the attention fc layer in the decoder block."""
-
- @abstractmethod
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Returns the second feed-forward layer in the decoder block."""
-
- @abstractmethod
- def iter_pre_act_post_act_params(
- self, model: PreTrainedModel
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Returns iterator of pre_act_params and post_act_params per transformer block."""
-
-
-class Int8Quantizer(AbstractQuantizerV2):
- """Int8 Quantizer Base."""
-
- def get_smoothing_calib_dataloader(self) -> DataLoader:
- """Get calibration dataset for Int8."""
- data_cfg = self.config.calibration_dataset
- dataset = safe_load_datasets(data_cfg)
- tokenizer = get_tokenizer(self.hook.model_config.name_or_path)
- dataset = (
- dataset.shuffle(self.config.seed)
- .select(range(data_cfg.num_samples))
- .select_columns([data_cfg.lookup_column_name])
- )
- encoded_dataset = tokenizer(
- dataset[data_cfg.lookup_column_name],
- return_tensors="pt",
- truncation=True,
- padding=True,
- max_length=data_cfg.max_length,
- )
- return DataLoader(encoded_dataset["input_ids"], batch_size=data_cfg.batch_size)
-
- def _smooth(
- self,
- model: PreTrainedModel,
- ) -> None:
- """Smooths the models before Quantization."""
- model.eval()
- # collect stats for Int8 quantization scale.
- with self._try_offload_model(model):
- calib_dataloader = self.get_smoothing_calib_dataloader()
- quant_config = cast(Int8QuantConfig, self.config)
- max_input_stats, _ = collect_stats(
- model,
- quant_config.device,
- calib_dataloader,
- self.hook.get_linear_layer_types(),
- tqdm_desc="Collecting stats for Smoothing.",
- percentile=100.0,
- )
-
- for pre_act_params, post_act_params, name in cast(
- Int8QuantHook, self.hook
- ).iter_pre_act_post_act_params(model):
- perform_smoothing(
- pre_act_params,
- post_act_params,
- max_input_stats[name],
- migration_strength=quant_config.int8_args.migration_strength,
- inplace=True,
- )
-
- def pre_quantize(
- self,
- model: PreTrainedModel,
- ) -> None:
- """Pre-procedure that should be called before quantize() is called."""
- self._smooth(model)
-
- def quantize(self, model: PreTrainedModel) -> torch.nn.Module:
- """Quantize the model."""
- self.pre_quantize(model)
- return super().quantize(model)
-
- def get_quant_config(self) -> Dict[str, Any]:
- """Get the quantization configuration."""
- return {
- "bits": 8,
- "mode": cast(Int8QuantConfig, self.config).int8_args.quant_type.value,
- "zero_point": False,
- "quant_method": "int8",
- "quant_group_size": self.config.quant_group_size,
- }
-
-
-class Int8StaticQuantizer(Int8Quantizer, AbstractWeightActQuantizer):
- """Int8 Dynamic Quantizer Base."""
-
-
-class Int8DynamicQuantizer(Int8Quantizer, AbstractWeightOnlyQuantizer):
- """Int8 Dynamic Quantizer Base."""
diff --git a/friendli/modules/quantizer_v2/int8/utils.py b/friendli/modules/quantizer_v2/int8/utils.py
deleted file mode 100644
index c482f87d..00000000
--- a/friendli/modules/quantizer_v2/int8/utils.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Int8 Quantizer Base."""
-
-from __future__ import annotations
-
-from typing import List, Tuple
-
-import torch
-
-
-@torch.no_grad()
-def perform_smoothing(
- pre_act_params: List[torch.Tensor],
- post_act_params: List[torch.Tensor],
- activation_max: torch.Tensor,
- *,
- migration_strength: float = 0.5,
- epsilon: float = 1e-5,
- inplace: bool = False,
-) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
- """Perform activation-weight smoothing in SmoothQuant.
-
- Performs the activation-weight smoothing scheme described in SmoothQuant
- (Xiao et al., 2023), which migrates the amplitude of outliers from activations
- to weights of matmul layers. The function takes in the following parameters:
-
- Args:
- pre_act_params: torch.Tensors representing affine parameters
- before each matmul layer.
- post_act_params: torch.Tensors representing the weight matrices of the matmul layer.
- activation_max: The maximum activation value of inputs of the matmul layer.
- migration_strength: the strength of the activation migration. Default is 0.5.
- epsilon: The epsilon used for numerical stability when calculating the scales.
- Default is 1e-5.
-
- Returns:
- A tuple of three torch.Tensors: (smoothed_pre_act_params, smoothed_post_act_params)
-
- The function calculates "scales" as `pow(|Activation|, migration_strength) /
- pow(|Weight|, 1-migration_strength)` and applies the smoothing effect into
- a normalization layer that exists before every matmul layer. This is done because
- it is more efficient than introducing a new smoothing layer before every matmul layer.
- Fusing the smoothing effect into the normalization layer results in a faster and
- more efficient implementation of the smoothing scheme.
-
- The function returns the smoothed normalization coefficients and the smoothed weight
- matrices after the smoothing process.
- """
- # shape of activation norms: [InChannels]
- # shape of fc weights: [OutChannels, InChannels]
- # shape of activation_max: [InChannels]
-
- # pylint: disable=too-many-locals
- assert pre_act_params
- assert post_act_params
-
- in_channels = pre_act_params[0].size(0)
- device = pre_act_params[0].device
- dtype = pre_act_params[0].dtype
-
- for pre_act_param in pre_act_params:
- assert pre_act_param.device == device
- assert pre_act_param.dtype == dtype
-
- for weight in post_act_params:
- assert weight.ndim == 2
- assert weight.size(1) == in_channels, (weight.size(), in_channels)
- assert weight.device == device
-
- activation_max = activation_max.to(device=device)
- weight_max = post_act_params[0].abs().max(dim=0).values
- for weight in post_act_params[1:]:
- weight_max = torch.maximum(weight_max, weight.abs().max(dim=0).values)
-
- assert tuple(activation_max.size()) == (in_channels,)
- assert tuple(weight_max.size()) == (in_channels,)
- alpha = migration_strength
- scales = (
- (
- activation_max.to(dtype=torch.float32).pow(alpha)
- / weight_max.to(dtype=torch.float32).pow(1 - alpha)
- )
- .clamp(min=epsilon)
- .to(dtype=dtype)
- )
-
- scaled_pre_act_params = [act_norm / scales for act_norm in pre_act_params]
- scaled_weights = [w * scales.view(1, -1) for w in post_act_params]
-
- if inplace:
- for dst, src in zip(pre_act_params, scaled_pre_act_params):
- dst.copy_(src)
- for dst, src in zip(post_act_params, scaled_weights):
- dst.copy_(src)
-
- return scaled_pre_act_params, scaled_weights
diff --git a/friendli/modules/quantizer_v2/layers.py b/friendli/modules/quantizer_v2/layers.py
deleted file mode 100644
index 3a203210..00000000
--- a/friendli/modules/quantizer_v2/layers.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Quantization Layers."""
-
-from __future__ import annotations
-
-from typing import Optional, cast
-
-import torch
-
-from friendli.modules.quantizer_v2.schema.data import (
- WeightActQuantResult,
- WeightOnlyQuantResult,
-)
-
-
-class WeightOnlyQuantizedLinearLayer(torch.nn.Module):
- """Linear Layer with weight only quantization."""
-
- def __init__(
- self,
- in_features: int,
- out_features: int,
- q_weight: torch.Tensor,
- weight_scale: torch.Tensor,
- zeros: Optional[torch.nn.Parameter] = None,
- bias: Optional[torch.nn.Parameter] = None,
- ):
- """Initialize the Weight Only Quantized Linear Layer."""
- super().__init__()
- self.in_features = in_features
- self.out_features = out_features
- self.weight_scale = torch.nn.Parameter(weight_scale)
- self.weight = torch.nn.Parameter(q_weight, requires_grad=False)
- self.register_parameter("zeros", zeros)
- self.register_parameter("bias", bias)
-
- @staticmethod
- def from_layer(
- layer: torch.nn.Module, quant_result: WeightOnlyQuantResult
- ) -> torch.nn.Module:
- """Returns the quantized layer from the original layer."""
- zeros = (
- torch.nn.Parameter(quant_result.zero_point)
- if quant_result.zero_point
- else None
- )
- return WeightOnlyQuantizedLinearLayer(
- cast(torch.nn.Linear, layer).in_features,
- cast(torch.nn.Linear, layer).out_features,
- quant_result.q_weight,
- quant_result.weight_scale,
- zeros,
- cast(torch.nn.Linear, layer).bias,
- )
-
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- """Forward pass with fake quantization. Not used in conversion."""
- raise NotImplementedError("Not used in conversion.")
-
-
-class WeightActQuantizedLinearLayer(torch.nn.Module):
- """Linear Layer with weight-act quantization."""
-
- def __init__( # pylint: disable=too-many-arguments
- self,
- q_weight: torch.Tensor,
- weight_scale: torch.Tensor,
- act_scale: torch.Tensor,
- bias: Optional[torch.nn.Parameter] = None,
- ):
- """Initialize the Weight Only Quantized Linear Layer."""
- super().__init__()
- self.in_scale = torch.nn.Parameter(act_scale)
- self.weight_scale = torch.nn.Parameter(weight_scale)
- self.weight = torch.nn.Parameter(q_weight, requires_grad=False)
- self.register_parameter("bias", bias)
-
- @staticmethod
- def from_layer(
- layer: torch.nn.Module, quant_result: WeightActQuantResult
- ) -> torch.nn.Module:
- """Returns the quantized layer from the original layer."""
- q_result = cast(WeightActQuantResult, quant_result)
- return WeightActQuantizedLinearLayer(
- q_result.q_weight,
- q_result.weight_scale,
- q_result.act_scale,
- cast(torch.nn.Linear, layer).bias if hasattr(layer, "bias") else None,
- )
-
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- """Forward pass with fake quantization. Not used in conversion."""
- raise NotImplementedError("Not used in conversion.")
diff --git a/friendli/modules/quantizer_v2/maps.py b/friendli/modules/quantizer_v2/maps.py
deleted file mode 100644
index 48e972eb..00000000
--- a/friendli/modules/quantizer_v2/maps.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Quantizer V2 Maps."""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, Tuple, Type, cast
-
-import transformers # type: ignore
-from transformers import ( # type: ignore
- LlamaForCausalLM,
- MistralForCausalLM,
- Phi3ForCausalLM,
- PretrainedConfig,
- PreTrainedModel,
-)
-
-from friendli.errors import NotSupportedQuantModeError, QuantizationError
-from friendli.modules.quantizer_v2.base import AbstractQuantizerV2
-from friendli.modules.quantizer_v2.enums import Int8QuantType, QuantMode
-from friendli.modules.quantizer_v2.int8.base import Int8DynamicQuantizer, Int8QuantHook
-from friendli.modules.quantizer_v2.models.llama import LlamaInt8QuantHook
-from friendli.modules.quantizer_v2.models.phi3 import Phi3Int8QuantHook
-from friendli.modules.quantizer_v2.schema.config import (
- Int8QuantConfig,
- OneOfQuantConfig,
-)
-
-model_arch_int8_hook_map: Dict[PreTrainedModel, type[Int8QuantHook]] = {
- LlamaForCausalLM: LlamaInt8QuantHook,
- MistralForCausalLM: LlamaInt8QuantHook,
- Phi3ForCausalLM: Phi3Int8QuantHook,
-}
-
-
-def get_quanthook_map(quant_mode: QuantMode) -> Dict[Type[PreTrainedModel], Any]:
- """Get quantizer map."""
- if quant_mode == QuantMode.INT8:
- return model_arch_int8_hook_map
- raise NotSupportedQuantModeError(
- invalid_option=quant_mode,
- valid_options=[e.value for e in QuantMode],
- )
-
-
-def get_model_class(config: PretrainedConfig) -> PreTrainedModel:
- """Get HuggingFace model architecture from config."""
- model_arch_list = cast(List[str], cast(PretrainedConfig, config).architectures)
- if len(model_arch_list) == 0:
- raise QuantizationError("Model architecture not found in config.")
- model_arch = model_arch_list[0]
- try:
- cls_type = getattr(transformers, model_arch, None)
- except AttributeError as exc:
- raise QuantizationError(str(exc)) from exc
- return cls_type
-
-
-def get_quantizer_class(quant_config: OneOfQuantConfig) -> Type[AbstractQuantizerV2]:
- """Get quantizer class."""
- quant_mode = quant_config.mode
- if quant_mode == QuantMode.INT8:
- if (
- cast(Int8QuantConfig, quant_config).int8_args.quant_type
- == Int8QuantType.DYNAMIC
- ):
- return Int8DynamicQuantizer
- raise QuantizationError(
- "Only Dynamic quantization is supported for int8 quantization."
- )
- raise NotSupportedQuantModeError(
- invalid_option=quant_mode,
- valid_options=[e.value for e in QuantMode],
- )
-
-
-def get_hf_quantizer_factory(
- model_config: PretrainedConfig,
- quant_config: OneOfQuantConfig,
-) -> Tuple[PreTrainedModel, AbstractQuantizerV2]:
- """Get quantizer for specific model architecture with quant mode and args."""
- hf_model_cls = get_model_class(model_config)
- quantizer = get_quantizer_class(quant_config)
- quanthook_map = get_quanthook_map(quant_config.mode)
- quanthook = quanthook_map[hf_model_cls](quant_config, model_config)
- return hf_model_cls, quantizer(quanthook, quant_config)
diff --git a/friendli/modules/quantizer_v2/models/llama.py b/friendli/modules/quantizer_v2/models/llama.py
deleted file mode 100644
index 649d8471..00000000
--- a/friendli/modules/quantizer_v2/models/llama.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli LlamaForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedModel
-
-from friendli.errors import NotSupportedCheckpointError, QuantizationError
-from friendli.modules.quantizer_v2.base import AbstractQuantHookV2
-from friendli.modules.quantizer_v2.int8.base import Int8QuantHook
-from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig
-from friendli.modules.quantizer_v2.schema.data import (
- ModuleName,
- QuantInput,
- TFQuantInputs,
-)
-
-
-class LlamaQuantHook(AbstractQuantHookV2):
- """BaseQuantHook for LlamaForCausalLM."""
-
- def check_model_config(self) -> None:
- """Check if LLaMA architectures' config can be converted to Friendli format."""
- try:
- if cast(LlamaConfig, self.model_config).hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(LlamaConfig, self.model_config).hidden_act}'",
- valid_options=["silu"],
- )
- if cast(LlamaConfig, self.model_config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if cast(LlamaConfig, self.model_config).rms_norm_eps not in (1e-5, 1e-6):
- raise NotSupportedCheckpointError(
- invalid_option=f"'rms_norm_eps={cast(LlamaConfig, self.model_config).rms_norm_eps}'",
- valid_options=[1e-5, 1e-6],
- )
- except AttributeError as exc:
- raise QuantizationError(str(exc)) from exc
-
- def get_tf_blocks(self, model: PreTrainedModel) -> List[torch.nn.Module]:
- """Return the transformer blocks in LlamaForCausalLM."""
- return model.model.layers
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Return the linear layer types in LlamaForCausalLM."""
- return (torch.nn.Linear,)
-
- @property
- def quantized_layer_prefix(self) -> str:
- """The layer name prefix used before LLaMA's transformer block number."""
- return "model.layers."
-
-
-class LlamaInt8QuantHook(LlamaQuantHook, Int8QuantHook):
- """Int8QuantHook for LlamaForCausalLM."""
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Return the linear layer after attention in the decoder layer."""
- return decoder_layer.self_attn.o_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Return the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.down_proj
-
- def iter_pre_act_post_act_params(
- self,
- model: LlamaForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Return iterator of layernorm's weight and linear layer's weight per transformer block in LlamaForCausalLM."""
-
- for index, decoder_layer in enumerate(model.model.layers): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.input_layernorm.weight.data,
- ],
- [
- decoder_layer.self_attn.q_proj.weight.data,
- decoder_layer.self_attn.k_proj.weight.data,
- decoder_layer.self_attn.v_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj", # the input tensors fed into Q, K, V matrices are identical.
- )
- # [LayerNorm 2] - [ MLP FF 1, MLP FF GATE ] gets smoothed
- yield (
- [
- decoder_layer.post_attention_layernorm.weight.data,
- ],
- [
- decoder_layer.mlp.up_proj.weight.data,
- decoder_layer.mlp.gate_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.mlp.up_proj",
- )
-
- def iter_tf_quant_inputs(self, model: PreTrainedModel) -> Iterator[TFQuantInputs]:
- """Return the layers which should be quantized in transformer block of LlamaForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.self_attn
- mlp = decoder_layer.mlp
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- QuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.q_proj",
- ],
- local_names=["q_proj"],
- ),
- QuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.k_proj",
- ],
- local_names=["k_proj"],
- ),
- QuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.v_proj",
- ],
- local_names=["v_proj"],
- ),
- QuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- ],
- local_names=[
- "o_proj",
- ],
- ),
- QuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.up_proj",
- ],
- local_names=["up_proj"],
- ),
- QuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.gate_proj",
- ],
- local_names=["gate_proj"],
- ),
- QuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.down_proj"
- ],
- local_names=["down_proj"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer_v2/models/phi3.py b/friendli/modules/quantizer_v2/models/phi3.py
deleted file mode 100644
index 0fdc095f..00000000
--- a/friendli/modules/quantizer_v2/models/phi3.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Phi3ForCausalLM QuantizerHook."""
-
-# mypy: ignore-errors
-
-from __future__ import annotations
-
-from typing import Iterator, List, Tuple, Type, cast
-
-import torch
-from transformers import Phi3Config, Phi3ForCausalLM, PreTrainedModel
-
-from friendli.errors import NotSupportedCheckpointError, QuantizationError
-from friendli.modules.quantizer_v2.base import AbstractQuantHookV2
-from friendli.modules.quantizer_v2.int8.base import Int8QuantHook
-from friendli.modules.quantizer_v2.schema.data import (
- ModuleName,
- QuantInput,
- TFQuantInputs,
-)
-
-
-class Phi3QuantHook(AbstractQuantHookV2):
- """BaseQuantHook for Phi3ForCausalLM."""
-
- def check_model_config(self) -> None:
- """Check if Phi3 architectures' config can be converted to Friendli format."""
- try:
- if cast(Phi3Config, self.model_config).hidden_act not in ["silu"]:
- raise NotSupportedCheckpointError(
- invalid_option=f"'hidden_act={cast(Phi3Config, self.model_config).hidden_act}'",
- valid_options=["silu"],
- )
- if cast(Phi3Config, self.model_config).tie_word_embeddings:
- raise NotSupportedCheckpointError(
- invalid_option="'tie_word_embeddings=True'",
- valid_options=[False],
- )
- if cast(Phi3Config, self.model_config).rms_norm_eps not in (1e-5, 1e-6):
- raise NotSupportedCheckpointError(
- invalid_option=f"'rms_norm_eps={cast(Phi3Config, self.model_config).rms_norm_eps}'",
- valid_options=[1e-5, 1e-6],
- )
- except AttributeError as exc:
- raise QuantizationError(str(exc)) from exc
-
- def get_tf_blocks(self, model: PreTrainedModel) -> List[torch.nn.Module]:
- """Return the transformer blocks in Phi3ForCausalLM."""
- return model.model.layers
-
- def get_linear_layer_types(self) -> Tuple[Type[torch.nn.Module]]:
- """Return the linear layer types in Phi3ForCausalLM."""
- return (torch.nn.Linear,)
-
- @property
- def quantized_layer_prefix(self) -> str:
- """The layer name prefix used before Phi3's transformer block number."""
- return "model.layers."
-
-
-class Phi3Int8QuantHook(Phi3QuantHook, Int8QuantHook):
- """Int8QuantHook for Phi3ForCausalLM."""
-
- def get_attn_fc_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Return the linear layer after attention in the decoder layer."""
- return decoder_layer.self_attn.o_proj
-
- def get_ff2_layer(self, decoder_layer: torch.nn.Module) -> torch.nn.Linear:
- """Return the linear layer after FF1 in the decoder layer."""
- return decoder_layer.mlp.down_proj
-
- def iter_pre_act_post_act_params(
- self,
- model: Phi3ForCausalLM,
- ) -> Iterator[Tuple[List[torch.Tensor], List[torch.Tensor], ModuleName]]:
- """Return iterator of layernorm's weight and linear layer's weight per transformer block in Phi3ForCausalLM."""
-
- for index, decoder_layer in enumerate(model.model.layers): # type: ignore[union-attr]
- # [LayerNorm 1] - [ QKV projection ] gets smoothed
- yield (
- [
- decoder_layer.input_layernorm.weight.data,
- ],
- [
- decoder_layer.self_attn.qkv_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.self_attn.qkv_proj",
- )
- # [LayerNorm 2] - [ MLP FF 1, MLP FF GATE ] gets smoothed
- yield (
- [
- decoder_layer.post_attention_layernorm.weight.data,
- ],
- [
- decoder_layer.mlp.gate_up_proj.weight.data,
- ],
- f"{self.quantized_layer_prefix}{index}.mlp.gate_up_proj",
- )
-
- def iter_tf_quant_inputs(self, model: PreTrainedModel) -> Iterator[TFQuantInputs]:
- """Return the layers which should be quantized in transformer block of Phi3ForCausalLM."""
- for index, decoder_layer in enumerate(
- self.get_tf_blocks(model) # type: ignore[union-attr, arg-type]
- ):
- self_attn = decoder_layer.self_attn
- mlp = decoder_layer.mlp
-
- yield TFQuantInputs(
- layer_index=index,
- block=decoder_layer,
- quant_inputs=[
- QuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.qkv_proj",
- ],
- local_names=["qkv_proj"],
- ),
- QuantInput(
- parent_module=self_attn,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.self_attn.o_proj",
- ],
- local_names=[
- "o_proj",
- ],
- ),
- QuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.gate_up_proj",
- ],
- local_names=["gate_up_proj"],
- ),
- QuantInput(
- parent_module=mlp,
- target_names=[
- f"{self.quantized_layer_prefix}{index}.mlp.down_proj"
- ],
- local_names=["down_proj"],
- ),
- ],
- )
diff --git a/friendli/modules/quantizer_v2/quantize.py b/friendli/modules/quantizer_v2/quantize.py
deleted file mode 100644
index 8187db5f..00000000
--- a/friendli/modules/quantizer_v2/quantize.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Converter."""
-
-from __future__ import annotations
-
-from typing import Optional
-
-from friendli.errors import TokenizerNotFoundError
-from friendli.logging import logger
-from friendli.modules.quantizer_v2.maps import get_hf_quantizer_factory
-from friendli.modules.quantizer_v2.schema.config import OneOfQuantConfig
-from friendli.modules.quantizer_v2.utils import (
- get_model_dtype,
- get_model_pretrained_config,
- save_tokenizer,
-)
-
-
-def quantize_checkpoint(
- model_name_or_path: str,
- output_dir: str,
- quant_config: OneOfQuantConfig,
- *,
- cache_dir: Optional[str] = None,
- dry_run: bool = False,
-) -> None:
- """Quantize HuggingFace model checkpoint to Friendli format.
-
- Args:
- model_name_or_path (str): Hugging Face model name or local path to the checkpoint.
- output_dir (str) : Directory path to save the converted checkpoint and the attribute YAML,
- and tokenizer configuration file.
- quant_config (OneOfQuantConfig): Quantization configuration.
- cache_dir (Optional[str], optional): Path for downloading checkpoint. Defaults to None.
- dry_run (bool, optional): Check only if checkpoint is convertable. Defaults to False.
-
- Raises:
- InValidconfigError: Raised when data_type is not supported.
- NotFoundError: Raised when `model_name_or_path` or `tokenizer_output_dir` is not found.
- NotSupportedCheckpointError: Raised when model architecture is not supported to quantize.
- """
- model_config = get_model_pretrained_config(
- model_name_or_path, output_dir, cache_dir
- )
- if quant_config.quant_scale_dtype is None:
- model_dtype = get_model_dtype(model_config.torch_dtype)
- quant_config.quant_scale_dtype = model_dtype
- logger.warn(
- "quant_scale_dtype is not set. Set to %s, same as hf model dtype.",
- model_dtype,
- )
- hf_factory, quantizer = get_hf_quantizer_factory(model_config, quant_config)
- dtype = model_config.torch_dtype
- quantizer.check_config()
-
- if not dry_run:
- logger.info(
- "Start loading Hugging Face checkpoint(%s) for conversion...",
- model_name_or_path,
- )
- model = hf_factory.from_pretrained(
- model_name_or_path,
- torch_dtype=dtype,
- cache_dir=cache_dir,
- trust_remote_code=True,
- low_cpu_mem_usage=True,
- # `low_cpu_mem_usage` is for model loading faster and using ~1x model size CPU memory.
- # https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel.from_pretrained.example
- )
- logger.info(
- "Hugging Face checkpoint(%s) is successfully loaded!",
- model_name_or_path,
- )
- model = quantizer.quantize(model)
- model.config.update({"quantization_config": quantizer.get_quant_config()})
- model.save_pretrained(output_dir)
- try:
- save_tokenizer(
- model_name_or_path=model_name_or_path,
- cache_dir=cache_dir,
- save_dir=output_dir,
- )
- except TokenizerNotFoundError as exc:
- logger.warn(str(exc))
- logger.info(
- "Hugging Face checkpoint (%s) is successfully quantized to Friendli format!",
- model_name_or_path,
- )
diff --git a/friendli/modules/quantizer_v2/schema/__init__.py b/friendli/modules/quantizer_v2/schema/__init__.py
deleted file mode 100644
index f5d8dd04..00000000
--- a/friendli/modules/quantizer_v2/schema/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer Schema."""
diff --git a/friendli/modules/quantizer_v2/schema/config.py b/friendli/modules/quantizer_v2/schema/config.py
deleted file mode 100644
index 37b481c2..00000000
--- a/friendli/modules/quantizer_v2/schema/config.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer Config Schema."""
-
-from __future__ import annotations
-
-from typing import Literal, Optional, Union
-
-from pydantic import BaseModel, Field
-from typing_extensions import Annotated
-
-from friendli.modules.quantizer_v2.enums import (
- Int8QuantType,
- ModelDataType,
- QuantDatasetFormat,
- QuantMode,
-)
-
-
-class CalibrationDatasetConfig(BaseModel):
- """Calibration dataset config."""
-
- path_or_name: str = "cnn_dailymail:3.0.0"
- format: QuantDatasetFormat = QuantDatasetFormat.JSON
- split: str = "validation"
- lookup_column_name: str = "article"
- num_samples: int = 512
- max_length: int = 512
- batch_size: int = 1
-
-
-class AbstractQuantConfig(BaseModel):
- """Abstract quantization config."""
-
- mode: QuantMode
- device: str = "cuda:0"
- offload: bool = True
- seed: int = 42
- percentile: float = 100.0
- quant_dtype: ModelDataType = ModelDataType.INT8
- quant_scale_dtype: Optional[ModelDataType] = None
- use_symmetric: bool = True
- quant_group_size: int = -1 # no grouping
- calibration_dataset: CalibrationDatasetConfig = Field(
- default_factory=CalibrationDatasetConfig
- )
-
-
-class Int8QuantArtgs(BaseModel):
- """Int8Quant args."""
-
- migration_strength: float = 0.5
- quant_type: Int8QuantType = Int8QuantType.DYNAMIC
-
-
-class Int8QuantConfig(AbstractQuantConfig):
- """Int8Quant config."""
-
- mode: Literal[QuantMode.INT8] = QuantMode.INT8
- int8_args: Int8QuantArtgs = Field(default_factory=Int8QuantArtgs)
-
-
-class DummyQuantConfig(AbstractQuantConfig):
- """Dummy quant config."""
-
- mode: Literal[QuantMode.DUMMY] = QuantMode.DUMMY
-
-
-OneOfQuantConfig = Annotated[
- Union[Int8QuantConfig, DummyQuantConfig], Field(discriminator="mode")
-]
-
-
-class QuantConfig(BaseModel):
- """Quantization config."""
-
- config: OneOfQuantConfig
diff --git a/friendli/modules/quantizer_v2/schema/data.py b/friendli/modules/quantizer_v2/schema/data.py
deleted file mode 100644
index a5d8e29d..00000000
--- a/friendli/modules/quantizer_v2/schema/data.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Model Quantizer Data Schema."""
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import List, Optional
-
-import torch
-
-ModuleName = str
-
-
-@dataclass
-class BaseQuantResult:
- """Dataclass for quantization result per layer."""
-
- q_group_size: int
- zero_point: Optional[torch.Tensor]
- q_weight: torch.Tensor
- weight_scale: torch.Tensor
-
-
-@dataclass
-class WeightOnlyQuantResult(BaseQuantResult):
- """Dataclass for weight-only quantization result per layer."""
-
-
-@dataclass
-class WeightActQuantResult(BaseQuantResult):
- """Dataclass for weight-activation quantization result per layer."""
-
- act_scale: torch.Tensor
- q_group_size: int
-
-
-@dataclass
-class QuantInput:
- """Dataclass for quantization input of each layer in transformer block.
-
- When you want to quantize specific layers at once, the target layers should be
- included in this dataclass. For example, if the quantization scale of the q_proj,
- k_proj, and v_proj layers in the self-attention layer are calculated together,
- the target_names and local_names of these layers should be included in the
- same QuantInput dataclass.
-
- Attributes:
- parent_module: module contains target layers.
- target_names: list of target module's full name
- (ex. model.model.layers.0.self_attn.q_proj, )
- local_names: list of target module's name using when access from parent_module
- (ex. q_proj, k_proj, v_proj )
- """
-
- parent_module: torch.nn.Module
- target_names: List[ModuleName]
- local_names: str
-
-
-@dataclass
-class TFQuantInputs:
- """Dataclass for Container of per transformer block."""
-
- layer_index: int
- block: torch.nn.Module
- quant_inputs: List[QuantInput]
diff --git a/friendli/modules/quantizer_v2/utils.py b/friendli/modules/quantizer_v2/utils.py
deleted file mode 100644
index 368ba95b..00000000
--- a/friendli/modules/quantizer_v2/utils.py
+++ /dev/null
@@ -1,565 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Friendli Quantizer Utils."""
-
-from __future__ import annotations
-
-import os
-from contextlib import contextmanager
-from pathlib import Path
-from typing import (
- Callable,
- Dict,
- Iterable,
- List,
- Optional,
- Protocol,
- Sequence,
- Tuple,
- Type,
- Union,
-)
-
-import datasets # type: ignore[import]
-import torch
-from accelerate import cpu_offload_with_hook # type: ignore
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-from transformers import ( # type: ignore
- AutoConfig,
- AutoTokenizer,
- PretrainedConfig,
- PreTrainedModel,
- PreTrainedTokenizer,
-)
-
-from friendli.errors import (
- InvalidConfigError,
- NotFoundError,
- QuantizationError,
- TokenizerNotFoundError,
-)
-from friendli.logging import logger
-from friendli.modules.quantizer_v2.enums import ModelDataType
-from friendli.modules.quantizer_v2.schema.config import CalibrationDatasetConfig
-from friendli.modules.quantizer_v2.schema.data import (
- ModuleName,
- WeightActQuantResult,
- WeightOnlyQuantResult,
-)
-
-
-def get_tokenizer(
- model_name_or_path: str,
- *,
- cache_dir: Optional[str] = None,
-) -> PreTrainedTokenizer:
- """Try to get tokenizer of a pretrained model."""
- try:
- tokenizer = AutoTokenizer.from_pretrained(
- model_name_or_path,
- cache_dir=cache_dir,
- trust_remote_code=True,
- )
- except OSError as exc:
- raise TokenizerNotFoundError(str(exc)) from exc
-
- if not tokenizer.is_fast:
- raise TokenizerNotFoundError(
- "This model does not support Friendli-compatible tokenizer"
- )
-
- if tokenizer.pad_token != "":
- tokenizer.pad_token = tokenizer.eos_token
- if tokenizer.pad_token is None:
- tokenizer.pad_token = tokenizer.eos_token
-
- return tokenizer
-
-
-def save_tokenizer(
- model_name_or_path: str,
- *,
- cache_dir: Optional[str] = None,
- save_dir: str,
-) -> Tuple[str, ...]:
- """Try to save `tokenizer.json` of a pretrained model."""
- if not os.path.isdir(save_dir):
- raise NotFoundError(f"Directory '{save_dir}' is not found.")
-
- tokenizer = get_tokenizer(model_name_or_path, cache_dir=cache_dir)
- saved_file_paths = tokenizer.save_pretrained(save_directory=save_dir)
- tokenizer_json_path = None
- for path in saved_file_paths:
- if "tokenizer.json" == os.path.basename(path):
- tokenizer_json_path = path
- break
-
- if tokenizer_json_path is None:
- raise TokenizerNotFoundError(
- "This model has the Friendli-compatible tokenizer implementation, but "
- "'tokenizer.json' file is not found."
- )
- return saved_file_paths
-
-
-def get_model_pretrained_config(
- model_name_or_path: str, model_output_path: str, cache_dir: Optional[str] = None
-) -> PretrainedConfig:
- """Get HuggingFace model configs."""
- try:
- config = AutoConfig.from_pretrained(
- model_name_or_path, cache_dir=cache_dir, trust_remote_code=True
- )
- except OSError as exc: # from AutoConfig.from_pretrained()
- config_dir = Path(model_name_or_path)
- model_output_dir = Path(model_output_path).parent
- if config_dir.exists() and model_output_dir.absolute() == config_dir.absolute():
- raise NotFoundError(
- f"'output_dir' ({model_output_dir.as_posix()}) and "
- f"'model_name_or_path' ({model_name_or_path}) are the same. "
- "In such a case, checkpoints should be prepared in 'output_dir'."
- ) from exc
- raise NotFoundError(str(exc)) from exc
-
- return config
-
-
-def safe_load_datasets(data_cfg: CalibrationDatasetConfig) -> datasets.Dataset:
- """Load dataset from calibration dataset config."""
- data_path = data_cfg.path_or_name
- data_split = data_cfg.split
-
- try:
- if os.path.exists(data_path):
- dataset = datasets.load_dataset(
- data_cfg.format,
- data_files=data_path,
- split=data_split,
- )
- else:
- data_name_parts = data_path.split(":")
- if len(data_name_parts) == 1:
- dataset = datasets.load_dataset(data_path, split=data_split)
- elif len(data_name_parts) == 2:
- data_name, subset_name = data_name_parts
- dataset = datasets.load_dataset(
- data_name, subset_name, split=data_split
- )
- else:
- raise InvalidConfigError(
- "Dataset name is in invalid format. "
- "(valid format: '' or ':')"
- )
- except ValueError as err:
- raise QuantizationError(f"datasets.load_dataset failed. {str(err)}") from err
-
- if not isinstance(dataset, datasets.Dataset):
- raise InvalidConfigError(
- "This dataset format is not supported for the calibration."
- )
-
- return dataset
-
-
-def build_percentile_statistics(
- scale_percentile: float,
- symmetric: bool = True,
-) -> Tuple[Callable, Callable, Callable]:
- """Builds the hooks for getting the max input and output activations of a model."""
- logger.info(
- "Building percentile statistics hooks. scale_percentile: (%s)",
- scale_percentile,
- )
-
- max_input_M1: Dict[str, torch.Tensor] = {}
- max_input_M2: Dict[str, torch.Tensor] = {}
- max_input_num: Dict[str, torch.Tensor] = {}
- max_output_M1: Dict[str, torch.Tensor] = {}
- max_output_M2: Dict[str, torch.Tensor] = {}
- max_output_num: Dict[str, torch.Tensor] = {}
-
- def create_hook(name: ModuleName):
- def update_stats(
- max_M1: Dict[str, torch.Tensor],
- max_M2: Dict[str, torch.Tensor],
- max_num: Dict[str, int],
- new_t: torch.Tensor,
- ) -> None:
- # Chan's method for computing mean and variance incrementally
- new_t = new_t.detach().reshape(-1, new_t.size(-1))
- new_numel = new_t.size(0)
- new_t_M1 = new_t.to(torch.float64).mean(dim=0)
- if symmetric:
- # it is assumed samples are always centered on zero
- # in the symmetric quantization scheme
- new_t_M1.zero_()
- new_t_M2 = ((new_t.to(torch.float64) - new_t_M1) ** 2).sum(dim=0)
- try:
- pre_numel = max_num[name]
- max_num[name] += new_numel
- delta = new_t_M1 - max_M1[name]
- max_M1[name] += delta * (new_numel / max_num[name])
- max_M2[name] += new_t_M2 + torch.pow(delta, 2) * (
- pre_numel * new_numel / max_num[name]
- )
- except KeyError:
- max_num[name] = new_numel
- max_M1[name] = new_t_M1
- max_M2[name] = new_t_M2
-
- def hook(module, in_t_tup, out_t): # pylint: disable=unused-argument
- with torch.no_grad():
- in_t = in_t_tup[0]
- update_stats(max_input_M1, max_input_M2, max_input_num, in_t)
- update_stats(max_output_M1, max_output_M2, max_output_num, out_t)
-
- return hook
-
- def finish_input_stats():
- return {
- name: torch.distributions.Normal(
- loc=max_input_M1[name],
- scale=torch.sqrt(max_input_M2[name] / max_input_num[name]).clip(
- min=1e-7
- ),
- ).icdf(
- torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to(
- max_input_M1[name].device
- )
- )
- for name in list(max_input_M1.keys())
- }
-
- def finish_output_stats():
- return {
- name: torch.distributions.Normal(
- loc=max_output_M1[name],
- scale=torch.sqrt(max_output_M2[name] / max_output_num[name]).clip(
- min=1e-7
- ),
- ).icdf(
- torch.Tensor([(scale_percentile / 100.0) * 0.5 + 0.5]).to(
- max_output_M1[name].device
- )
- )
- for name in list(max_output_M1.keys())
- }
-
- return finish_input_stats, finish_output_stats, create_hook
-
-
-def build_max_statistics() -> Tuple[Callable, Callable, Callable]:
- """Builds the hooks for getting the max input and output activations of a model."""
- logger.info("Building max statistics hooks")
- max_input_stats: Dict[str, torch.Tensor] = {}
- max_output_stats: Dict[str, torch.Tensor] = {}
-
- def create_hook(name: ModuleName):
- def hook(modules, in_t_tup, out_t): # pylint: disable=unused-argument
- in_t = in_t_tup[0]
- in_t = (
- in_t.detach().abs().reshape(-1, in_t.size(-1)).max(dim=0).values
- ) # reduce-max only leaving the hidden dim (supposing the last dim is the hidden dim)
- out_t = out_t.detach().reshape(-1, out_t.size(-1))
- out_t = out_t.abs().max(dim=0).values
- try:
- max_input_stats[name] = torch.maximum(max_input_stats[name], in_t)
- except KeyError:
- max_input_stats[name] = in_t
- try:
- max_output_stats[name] = torch.maximum(max_output_stats[name], out_t)
- except KeyError:
- max_output_stats[name] = out_t
-
- return hook
-
- def finish_input_stats():
- return max_input_stats
-
- def finish_output_stats():
- return max_output_stats
-
- return finish_input_stats, finish_output_stats, create_hook
-
-
-@torch.no_grad()
-def collect_stats(
- model: PreTrainedModel,
- device: str,
- calib_dataloader: DataLoader,
- target_classes: Tuple[Type[torch.nn.Module], ...],
- tqdm_desc: str,
- percentile: float,
-) -> Tuple[Dict[ModuleName, torch.Tensor], Dict[ModuleName, torch.Tensor]]:
- """Collects the maximum values of input and output activations of a specific model.
-
- Args:
- model (torch.nn.Module): The model for which we want to collect the max statistics.
- dataset (Dataset): Dataset that contains input tensors.
- target_classes (Tuple[Type[torch.nn.Module], ...]): A tuple of the target classes.
-
- Returns:
- A tuple of two dictionaries: (max_input_stats, max_output_stats), where:
- max_input_stats: The maximum input activation values for each module of the model.
- max_output_stats: The maximum output activation values for each module of the model.
-
- This function uses a forward hook to capture the maximum input and output activation values
- of the specified target_classes. The max_batch_size parameter controls the size of the input
- batches that are passed through the model.
-
- The function returns two dictionaries containing the maximum input and output activation
- values for each module of the model, respectively. These dictionaries can be used to calculate
- scaling factors for weight quantization and activation smoothing.
-
- """
- # pylint: disable=too-many-locals
- max_input_stats, max_output_stats, create_hook = (
- build_percentile_statistics(percentile)
- if percentile < 100.0
- else build_max_statistics()
- )
- name_mods = [
- (name, module)
- for name, module in model.named_modules()
- if isinstance(module, target_classes)
- ]
-
- removables = []
- for name, module in name_mods:
- removables.append(module.register_forward_hook(create_hook(name)))
- try:
- for inputs in tqdm(calib_dataloader, desc=tqdm_desc):
- model(inputs.to(device))
- finally:
- for removable in removables:
- removable.remove()
- return max_input_stats(), max_output_stats()
-
-
-def convert_tensor_to_quant_dtype(
- param: torch.Tensor,
- quant_dtype: ModelDataType,
-) -> torch.Tensor:
- """Convert tensor format to the given data type.
-
- Args:
- param (torch.Tensor): The tensor to be converted.
- data_type (ModelDataType): The data type of the tensor.
-
- Returns:
- torch.Tensor: The converted tensor.
-
- """
- assert quant_dtype in [ModelDataType.INT4, ModelDataType.INT8]
- if quant_dtype is ModelDataType.INT4:
- pack_num = 8 // 4
- int4_param = torch.zeros(
- (param.shape[0], param.shape[1] // pack_num),
- dtype=torch.uint8,
- device=param.device,
- )
- for col in range(int4_param.shape[1]):
- for i in range(pack_num):
- int4_param[:, col] |= param[:, col * pack_num + i] << (i * 4)
- param = int4_param.to(torch.int8)
-
- elif quant_dtype is ModelDataType.INT8:
- param = param.to(torch.int8)
-
- return param.detach().to("cpu")
-
-
-@torch.no_grad()
-def get_weight_act_quant_scales(
- model: PreTrainedModel,
- layer_names: List[str],
- max_input_stats: Dict[ModuleName, torch.Tensor],
- device: str = "cpu",
- quant_dtype: ModelDataType = ModelDataType.INT8,
- quant_scale_dtype: ModelDataType = ModelDataType.FP32,
-) -> List[WeightActQuantResult]:
- """Get the quantization scales and int8 weight for a specific layer."""
- input_max = torch.concat([max_input_stats[name] for name in layer_names])
- target_weights = [model.get_submodule(name).weight for name in layer_names]
- target_weight = torch.concat(target_weights)
-
- max_val = 2 ** (8 - 1) - 1
- min_val = -(2 ** (8 - 1))
-
- act_scale = float(input_max.detach().abs().max().item()) / float(max_val)
- weight_scale = float(target_weight.detach().abs().max().item()) / float(max_val)
-
- q_weights = [
- (
- convert_tensor_to_quant_dtype(
- (weight.detach().float() / weight_scale).clip(min_val, max_val),
- quant_dtype,
- ).to(device)
- )
- for weight in target_weights
- ]
- quant_scale_torch_dtype = get_torch_data_type(quant_scale_dtype)
- return [
- WeightActQuantResult(
- act_scale=torch.tensor(act_scale, dtype=quant_scale_torch_dtype),
- weight_scale=torch.tensor(weight_scale, dtype=quant_scale_torch_dtype),
- q_weight=q_weight,
- q_group_size=-1,
- zero_point=None,
- )
- for _, q_weight in zip(layer_names, q_weights)
- ]
-
-
-def get_weight_only_quant_scales(
- model: PreTrainedModel,
- layer_names: List[str],
- quant_dtype: ModelDataType,
- quant_scale_dtype: ModelDataType,
- q_group_size: int = -1,
- use_symmetric: bool = True,
- device: Union[str, torch.device] = "cpu",
-) -> List[WeightOnlyQuantResult]:
- """Return the quantization scales of weight for a specific layer."""
- # pylint: disable=too-many-locals
- assert quant_dtype in [ModelDataType.INT4, ModelDataType.INT8]
- q_bit = 4 if quant_dtype == ModelDataType.INT4 else 8
- target_weights = [model.get_submodule(name).weight for name in layer_names]
- org_w_shape = target_weights[0].shape # [OutDim, InDim]
- w = torch.concat(target_weights)
-
- if q_group_size != -1:
- w = w.reshape(-1, q_group_size) # [OutDim x num_groups, group_size]
-
- if use_symmetric:
- max_val = w.abs().amax(dim=1, keepdim=True)
- max_int = 2 ** (q_bit - 1) - 1
- min_int = -(2 ** (q_bit - 1))
- scales = (max_val / float(max_int)).clamp(min=1e-5)
- zeros = torch.zeros_like(max_val)
- else:
- max_val = w.amax(dim=1, keepdim=True)
- min_val = w.amin(dim=1, keepdim=True)
- max_int = 2**q_bit - 1
- min_int = 0
-
- scales = (max_val - min_val).clamp(min=1e-5) / max_int
- zeros = (-torch.round(min_val / scales)).clamp_(min_int, max_int)
-
- q_weights = [
- convert_tensor_to_quant_dtype(
- torch.clamp(torch.round(w / scales) + zeros, min_int, max_int)
- .reshape(org_w_shape)
- .detach(),
- quant_dtype,
- ).to(device)
- for w in target_weights
- ]
- quant_scale_torch_dtype = get_torch_data_type(quant_scale_dtype)
- scales = (
- scales.view(org_w_shape[0], -1).detach().transpose(0, 1).to(device)
- ) # [num_groups, OutDim]
- zeros = (
- zeros.view(org_w_shape[0], -1).detach().transpose(0, 1).to(device)
- ) # [num_groups, OutDim]
-
- if q_group_size == -1:
- scales = scales.squeeze(0)
- zeros = zeros.squeeze(0)
-
- return [
- WeightOnlyQuantResult(
- zero_point=None if use_symmetric else zeros.to(quant_scale_torch_dtype),
- q_group_size=q_group_size,
- weight_scale=scales.to(quant_scale_torch_dtype),
- q_weight=q_weight,
- )
- for q_weight in q_weights
- ]
-
-
-def get_model_dtype(torch_dtype: torch.dtype) -> ModelDataType:
- """Get torch data type from Enum."""
- if torch_dtype == torch.float16:
- return ModelDataType.FP16
- if torch_dtype == torch.float32:
- return ModelDataType.FP32
- if torch_dtype == torch.bfloat16:
- return ModelDataType.BF16
- raise QuantizationError(f"{torch_dtype} is not valid dtype for hf model dtype.")
-
-
-def get_torch_data_type(data_type: str) -> torch.dtype:
- """Get torch data type from Enum."""
- if data_type == ModelDataType.FP16:
- return torch.float16
- if data_type == ModelDataType.FP32:
- return torch.float32
- if data_type == ModelDataType.BF16:
- return torch.bfloat16
- raise QuantizationError(
- f"Can't not converted original param to {data_type}. Only FP16, FP32, BF16 are supported."
- )
-
-
-def send_model_to_device(
- model: PreTrainedModel,
- device: Union[str, torch.device],
- *,
- exclude: Iterable[torch.nn.Module] = (),
-):
- """Send the model and its submodules onto device except for modules designated by `exclude`."""
- exclude_set = set(exclude)
-
- @torch.no_grad()
- def recurse(m: torch.nn.Module):
- if m in exclude_set:
- return
- for name, p in list(m.named_parameters(recurse=False)):
- m.register_parameter(name, torch.nn.Parameter(p.to(device)))
- for name, b in list(m.named_buffers(recurse=False)):
- m.register_buffer(name, b.to(device))
-
- for child in m.children():
- recurse(child)
-
- recurse(model)
-
-
-class RemovableOffloaderHook(Protocol):
- """Hook protocol for cpu offloader."""
-
- def offload(self) -> None:
- """Offload the associated block onto CPU."""
-
- def remove(self) -> None:
- """Remove this hook."""
-
-
-@contextmanager
-def offload_module_sequence(
- blocks: Sequence[torch.nn.Module], device: Union[str, torch.device]
-):
- """Offload a sequence of torch modules automatically.
-
- In the beginning, all blocks are supposed to reside on CPU.
- When i-th block is called, it is loaded onto `device` on the fly.
- And at the same time, it offloads (i-1)-th block back to CPU.
- """
- module_hooks: List[RemovableOffloaderHook] = []
- if blocks:
- prev_module_hook = None
- for tf_block in blocks:
- _, module_hook = cpu_offload_with_hook(
- tf_block, device, prev_module_hook=prev_module_hook
- )
- prev_module_hook = module_hook
- module_hooks.append(module_hook)
- try:
- yield
- finally:
- for hook in module_hooks:
- hook.offload()
- for hook in module_hooks:
- hook.remove()
diff --git a/friendli/sdk/api/base.py b/friendli/sdk/api/base.py
index 2e6aad86..8c803c50 100644
--- a/friendli/sdk/api/base.py
+++ b/friendli/sdk/api/base.py
@@ -24,7 +24,12 @@
from friendli.auth import get_auth_header
from friendli.errors import APIError
-from friendli.utils.request import DEFAULT_REQ_TIMEOUT, transform_request_data
+from friendli.utils.request import (
+ DEFAULT_CONNECTION_LIMITS,
+ DEFAULT_REQ_TIMEOUT,
+ DEFAULT_TIMEOUT,
+ transform_request_data,
+)
_GenerationLine = TypeVar("_GenerationLine", bound=BaseModel)
@@ -93,10 +98,26 @@ async def __anext__(self) -> _GenerationLine:
_ProtoMsgType = TypeVar("_ProtoMsgType", bound=Type[pb_message.Message])
+class _DefaultHttpxClient(httpx.Client):
+ def __init__(self, **kwargs: Any) -> None:
+ kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+ kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+ kwargs.setdefault("follow_redirects", True)
+ super().__init__(**kwargs)
+
+
+class _DefaultAsyncHttpxClient(httpx.AsyncClient):
+ def __init__(self, **kwargs: Any) -> None:
+ kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+ kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+ kwargs.setdefault("follow_redirects", True)
+ super().__init__(**kwargs)
+
+
class BaseAPI(ABC, Generic[_HttpxClient, _ProtoMsgType]):
"""Base API interface."""
- _client: _HttpxClient
+ _http_client: _HttpxClient
def __init__(
self,
@@ -133,13 +154,12 @@ def _build_http_request(
self, data: dict[str, Any], model: Optional[str] = None
) -> httpx.Request:
"""Build request."""
- return self._client.build_request(
+ return self._http_client.build_request(
method=self._method,
url=self._build_http_url(),
content=self._build_content(data, model),
files=self._build_files(data),
headers=self._get_headers(),
- timeout=DEFAULT_REQ_TIMEOUT,
)
def _build_http_url(self) -> httpx.URL:
@@ -213,7 +233,7 @@ def __init__(
endpoint_id: Optional[str] = None,
use_protobuf: bool = False,
use_grpc: bool = False,
- client: Optional[httpx.Client] = None,
+ http_client: Optional[httpx.Client] = None,
grpc_channel: Optional[grpc.Channel] = None,
) -> None:
"""Initializes ServingAPI."""
@@ -224,7 +244,7 @@ def __init__(
)
self._use_grpc = use_grpc
- self._client = client or httpx.Client()
+ self._http_client = http_client or _DefaultHttpxClient()
self._grpc_channel = grpc_channel
self._grpc_stub = None
@@ -240,7 +260,7 @@ def close(self) -> None:
"""Close the gRPC channel and HTTP client."""
if self._grpc_channel:
self._grpc_channel.close()
- self._client.close()
+ self._http_client.close()
def _get_grpc_stub(self, channel: grpc.Channel) -> Any:
raise NotImplementedError # pragma: no cover
@@ -274,7 +294,7 @@ def _request(
return grpc_response
http_request = self._build_http_request(data=data, model=model)
- http_response = self._client.send(request=http_request, stream=stream)
+ http_response = self._http_client.send(request=http_request, stream=stream)
self._check_http_error(http_response)
return http_response
@@ -302,7 +322,7 @@ def __init__(
endpoint_id: Optional[str] = None,
use_protobuf: bool = False,
use_grpc: bool = False,
- client: Optional[httpx.AsyncClient] = None,
+ http_client: Optional[httpx.AsyncClient] = None,
grpc_channel: Optional[grpc.aio.Channel] = None,
) -> None:
"""Initializes AsyncServingAPI."""
@@ -311,7 +331,7 @@ def __init__(
)
self._use_grpc = use_grpc
- self._client = client or httpx.AsyncClient()
+ self._http_client = http_client or _DefaultAsyncHttpxClient()
self._grpc_channel = grpc_channel
self._grpc_stub = None
@@ -327,7 +347,7 @@ async def close(self) -> None:
"""Close the gRPC channel and HTTP client."""
if self._grpc_channel:
await self._grpc_channel.close(grace=None)
- await self._client.aclose()
+ await self._http_client.aclose()
def _get_grpc_stub(self, channel: grpc.aio.Channel) -> Any:
raise NotImplementedError # pragma: no cover
@@ -363,7 +383,9 @@ async def _request(
return grpc_response
http_request = self._build_http_request(data=data, model=model)
- http_response = await self._client.send(request=http_request, stream=stream)
+ http_response = await self._http_client.send(
+ request=http_request, stream=stream
+ )
await self._check_http_error(http_response)
return http_response
diff --git a/friendli/sdk/api/chat/chat.py b/friendli/sdk/api/chat/chat.py
index 9741794d..7351ac01 100644
--- a/friendli/sdk/api/chat/chat.py
+++ b/friendli/sdk/api/chat/chat.py
@@ -24,7 +24,7 @@ def __init__(
endpoint_id: Optional[str] = None,
use_protobuf: bool = False,
use_grpc: bool = False,
- client: Optional[httpx.Client] = None,
+ http_client: Optional[httpx.Client] = None,
grpc_channel: Optional[grpc.Channel] = None,
) -> None:
"""Initializes Chat."""
@@ -33,7 +33,7 @@ def __init__(
endpoint_id=endpoint_id,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
- client=client,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
@@ -53,7 +53,7 @@ def __init__(
endpoint_id: Optional[str] = None,
use_protobuf: bool = False,
use_grpc: bool = False,
- client: Optional[httpx.AsyncClient] = None,
+ http_client: Optional[httpx.AsyncClient] = None,
grpc_channel: Optional[grpc.aio.Channel] = None,
) -> None:
"""Initializes AsyncChat."""
@@ -62,7 +62,7 @@ def __init__(
endpoint_id=endpoint_id,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
- client=client,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
diff --git a/friendli/sdk/api/images/images.py b/friendli/sdk/api/images/images.py
index 1901c501..fb58e658 100644
--- a/friendli/sdk/api/images/images.py
+++ b/friendli/sdk/api/images/images.py
@@ -20,11 +20,11 @@ def __init__(
self,
base_url: Optional[str] = None,
endpoint_id: Optional[str] = None,
- client: Optional[httpx.Client] = None,
+ http_client: Optional[httpx.Client] = None,
) -> None:
"""Initialize Images."""
self.text_to_image = TextToImage(
- base_url=base_url, endpoint_id=endpoint_id, client=client
+ base_url=base_url, endpoint_id=endpoint_id, http_client=http_client
)
def close(self) -> None:
@@ -41,11 +41,11 @@ def __init__(
self,
base_url: Optional[str] = None,
endpoint_id: Optional[str] = None,
- client: Optional[httpx.AsyncClient] = None,
+ http_client: Optional[httpx.AsyncClient] = None,
) -> None:
"""Initialize Images."""
self.text_to_image = AsyncTextToImage(
- base_url=base_url, endpoint_id=endpoint_id, client=client
+ base_url=base_url, endpoint_id=endpoint_id, http_client=http_client
)
async def close(self) -> None:
diff --git a/friendli/sdk/client.py b/friendli/sdk/client.py
index 6ef97e53..6930641c 100644
--- a/friendli/sdk/client.py
+++ b/friendli/sdk/client.py
@@ -8,6 +8,7 @@
import grpc
import grpc.aio
+import httpx
import friendli
from friendli.client.graphql.endpoint import EndpointGqlClient
@@ -34,6 +35,7 @@ def __init__(
base_url: Optional[str] = None,
use_protobuf: bool = False,
use_grpc: bool = False,
+ http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None,
grpc_channel: Optional[Union[grpc.Channel, grpc.aio.Channel]] = None,
):
"""Initializes FriendliClientBase."""
@@ -52,6 +54,8 @@ def __init__(
raise ValueError(
"One of `base_url` and `grpc_channel` should be set when `use_grpc=True`."
)
+ if http_client is not None:
+ raise ValueError("You cannot use HTTP client when `use_grpc=True`.")
else:
if grpc_channel is not None:
raise ValueError(
@@ -80,6 +84,7 @@ def __init__(
base_url: Optional[str] = None,
use_protobuf: bool = False,
use_grpc: bool = False,
+ http_client: Optional[httpx.Client] = None,
grpc_channel: Optional[grpc.Channel] = None,
):
"""Initializes Friendli."""
@@ -91,6 +96,7 @@ def __init__(
base_url=base_url,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
@@ -99,6 +105,7 @@ def __init__(
endpoint_id=self._endpoint_id,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
self.chat = Chat(
@@ -106,9 +113,14 @@ def __init__(
endpoint_id=self._endpoint_id,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
- self.images = Images(base_url=self._base_url, endpoint_id=self._endpoint_id)
+ self.images = Images(
+ base_url=self._base_url,
+ endpoint_id=self._endpoint_id,
+ http_client=http_client,
+ )
endpoint_client = EndpointGqlClient()
model_client = ModelGqlClient()
@@ -147,6 +159,7 @@ def __init__(
base_url: Optional[str] = None,
use_protobuf: bool = False,
use_grpc: bool = False,
+ http_client: Optional[httpx.AsyncClient] = None,
grpc_channel: Optional[grpc.aio.Channel] = None,
):
"""Initializes AsyncFriendli."""
@@ -158,6 +171,7 @@ def __init__(
base_url=base_url,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
@@ -166,6 +180,7 @@ def __init__(
endpoint_id=self._endpoint_id,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
self.chat = AsyncChat(
@@ -173,10 +188,13 @@ def __init__(
endpoint_id=self._endpoint_id,
use_protobuf=use_protobuf,
use_grpc=use_grpc,
+ http_client=http_client,
grpc_channel=grpc_channel,
)
self.images = AsyncImages(
- base_url=self._base_url, endpoint_id=self._endpoint_id
+ base_url=self._base_url,
+ endpoint_id=self._endpoint_id,
+ http_client=http_client,
)
async def __aenter__(self) -> AsyncFriendli:
diff --git a/friendli/utils/request.py b/friendli/utils/request.py
index 49157af5..9fb9e6bf 100644
--- a/friendli/utils/request.py
+++ b/friendli/utils/request.py
@@ -6,14 +6,19 @@
from typing import Any
+import httpx
import pydantic
from requests.exceptions import HTTPError
from friendli.utils.compat import model_dump
from friendli.utils.url import discuss_url
-DEFAULT_REQ_TIMEOUT = 30
+DEFAULT_REQ_TIMEOUT = 600.0
MAX_RETRIES = 3
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=DEFAULT_REQ_TIMEOUT, connect=5.0)
+DEFAULT_CONNECTION_LIMITS = httpx.Limits(
+ max_connections=1000, max_keepalive_connections=100
+)
def decode_http_err(exc: HTTPError) -> str:
diff --git a/friendli/utils/validate.py b/friendli/utils/validate.py
index 8557d666..0e395266 100644
--- a/friendli/utils/validate.py
+++ b/friendli/utils/validate.py
@@ -6,7 +6,6 @@
from datetime import datetime
from enum import Enum
-from importlib.util import find_spec
from typing import Any, Dict, Optional, Type
import typer
@@ -81,16 +80,3 @@ def validate_enums(val: Any, enum_cls: Type[Enum]) -> Any:
raise InvalidConfigError(
f"Invalid value. Please provide one of {supported_values}"
) from exc
-
-
-def validate_convert_imports() -> None:
- """Validate the import modules for checkpoint conversion."""
- if find_spec("torch") is None:
- raise ModuleNotFoundError(
- "To convert the checkpoint, you must install 'torch'."
- )
- if find_spec("transformers") is None or find_spec("accelerate") is None:
- raise ModuleNotFoundError(
- "To convert the checkpoint,"
- " your must install the package with 'pip install \"friendli-client[mllib]\"'"
- )
diff --git a/friendli/utils/version.py b/friendli/utils/version.py
index 55cf390b..ba7e9574 100644
--- a/friendli/utils/version.py
+++ b/friendli/utils/version.py
@@ -48,8 +48,3 @@ def check_dependencies_installed(deps: List[str]) -> bool:
return False
return True
-
-
-def check_extras_installed() -> bool:
- """Check extra package dependencies are installed."""
- return check_dependencies_installed(["torch", "transformers"])
diff --git a/poetry.lock b/poetry.lock
index 763e0739..b2a31c21 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,142 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
-
-[[package]]
-name = "accelerate"
-version = "0.21.0"
-description = "Accelerate"
-optional = true
-python-versions = ">=3.8.0"
-files = [
- {file = "accelerate-0.21.0-py3-none-any.whl", hash = "sha256:e2609d37f2c6a56e36a0612feae6ff6d9daac9759f4899432b86b1dc97024ebb"},
- {file = "accelerate-0.21.0.tar.gz", hash = "sha256:e2959a0bf74d97c0b3c0e036ed96065142a060242281d27970d4c4e34f11ca59"},
-]
-
-[package.dependencies]
-numpy = ">=1.17"
-packaging = ">=20.0"
-psutil = "*"
-pyyaml = "*"
-torch = ">=1.10.0"
-
-[package.extras]
-dev = ["black (>=23.1,<24.0)", "datasets", "deepspeed", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.0.241)", "scikit-learn", "scipy", "tqdm", "transformers", "urllib3 (<2.0.0)"]
-quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.0.241)", "urllib3 (<2.0.0)"]
-rich = ["rich"]
-sagemaker = ["sagemaker"]
-test-dev = ["datasets", "deepspeed", "evaluate", "scikit-learn", "scipy", "tqdm", "transformers"]
-test-prod = ["parameterized", "pytest", "pytest-subtests", "pytest-xdist"]
-test-trackers = ["comet-ml", "tensorboard", "wandb"]
-testing = ["datasets", "deepspeed", "evaluate", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "tqdm", "transformers"]
-
-[[package]]
-name = "aiohttp"
-version = "3.9.3"
-description = "Async http client/server framework (asyncio)"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:939677b61f9d72a4fa2a042a5eee2a99a24001a67c13da113b2e30396567db54"},
- {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f5cd333fcf7590a18334c90f8c9147c837a6ec8a178e88d90a9b96ea03194cc"},
- {file = "aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82e6aa28dd46374f72093eda8bcd142f7771ee1eb9d1e223ff0fa7177a96b4a5"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f56455b0c2c7cc3b0c584815264461d07b177f903a04481dfc33e08a89f0c26b"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bca77a198bb6e69795ef2f09a5f4c12758487f83f33d63acde5f0d4919815768"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e083c285857b78ee21a96ba1eb1b5339733c3563f72980728ca2b08b53826ca5"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab40e6251c3873d86ea9b30a1ac6d7478c09277b32e14745d0d3c6e76e3c7e29"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df822ee7feaaeffb99c1a9e5e608800bd8eda6e5f18f5cfb0dc7eeb2eaa6bbec"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:acef0899fea7492145d2bbaaaec7b345c87753168589cc7faf0afec9afe9b747"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cd73265a9e5ea618014802ab01babf1940cecb90c9762d8b9e7d2cc1e1969ec6"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a78ed8a53a1221393d9637c01870248a6f4ea5b214a59a92a36f18151739452c"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:6b0e029353361f1746bac2e4cc19b32f972ec03f0f943b390c4ab3371840aabf"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cf5c9458e1e90e3c390c2639f1017a0379a99a94fdfad3a1fd966a2874bba52"},
- {file = "aiohttp-3.9.3-cp310-cp310-win32.whl", hash = "sha256:3e59c23c52765951b69ec45ddbbc9403a8761ee6f57253250c6e1536cacc758b"},
- {file = "aiohttp-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:055ce4f74b82551678291473f66dc9fb9048a50d8324278751926ff0ae7715e5"},
- {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6b88f9386ff1ad91ace19d2a1c0225896e28815ee09fc6a8932fded8cda97c3d"},
- {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c46956ed82961e31557b6857a5ca153c67e5476972e5f7190015018760938da2"},
- {file = "aiohttp-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07b837ef0d2f252f96009e9b8435ec1fef68ef8b1461933253d318748ec1acdc"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad46e6f620574b3b4801c68255492e0159d1712271cc99d8bdf35f2043ec266"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ed3e046ea7b14938112ccd53d91c1539af3e6679b222f9469981e3dac7ba1ce"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:039df344b45ae0b34ac885ab5b53940b174530d4dd8a14ed8b0e2155b9dddccb"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7943c414d3a8d9235f5f15c22ace69787c140c80b718dcd57caaade95f7cd93b"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84871a243359bb42c12728f04d181a389718710129b36b6aad0fc4655a7647d4"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5eafe2c065df5401ba06821b9a054d9cb2848867f3c59801b5d07a0be3a380ae"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9d3c9b50f19704552f23b4eaea1fc082fdd82c63429a6506446cbd8737823da3"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:f033d80bc6283092613882dfe40419c6a6a1527e04fc69350e87a9df02bbc283"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:2c895a656dd7e061b2fd6bb77d971cc38f2afc277229ce7dd3552de8313a483e"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1f5a71d25cd8106eab05f8704cd9167b6e5187bcdf8f090a66c6d88b634802b4"},
- {file = "aiohttp-3.9.3-cp311-cp311-win32.whl", hash = "sha256:50fca156d718f8ced687a373f9e140c1bb765ca16e3d6f4fe116e3df7c05b2c5"},
- {file = "aiohttp-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:5fe9ce6c09668063b8447f85d43b8d1c4e5d3d7e92c63173e6180b2ac5d46dd8"},
- {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:38a19bc3b686ad55804ae931012f78f7a534cce165d089a2059f658f6c91fa60"},
- {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:770d015888c2a598b377bd2f663adfd947d78c0124cfe7b959e1ef39f5b13869"},
- {file = "aiohttp-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee43080e75fc92bf36219926c8e6de497f9b247301bbf88c5c7593d931426679"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52df73f14ed99cee84865b95a3d9e044f226320a87af208f068ecc33e0c35b96"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc9b311743a78043b26ffaeeb9715dc360335e5517832f5a8e339f8a43581e4d"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b955ed993491f1a5da7f92e98d5dad3c1e14dc175f74517c4e610b1f2456fb11"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504b6981675ace64c28bf4a05a508af5cde526e36492c98916127f5a02354d53"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fe5571784af92b6bc2fda8d1925cccdf24642d49546d3144948a6a1ed58ca5"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ba39e9c8627edc56544c8628cc180d88605df3892beeb2b94c9bc857774848ca"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e5e46b578c0e9db71d04c4b506a2121c0cb371dd89af17a0586ff6769d4c58c1"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:938a9653e1e0c592053f815f7028e41a3062e902095e5a7dc84617c87267ebd5"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:c3452ea726c76e92f3b9fae4b34a151981a9ec0a4847a627c43d71a15ac32aa6"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff30218887e62209942f91ac1be902cc80cddb86bf00fbc6783b7a43b2bea26f"},
- {file = "aiohttp-3.9.3-cp312-cp312-win32.whl", hash = "sha256:38f307b41e0bea3294a9a2a87833191e4bcf89bb0365e83a8be3a58b31fb7f38"},
- {file = "aiohttp-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:b791a3143681a520c0a17e26ae7465f1b6f99461a28019d1a2f425236e6eedb5"},
- {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ed621426d961df79aa3b963ac7af0d40392956ffa9be022024cd16297b30c8c"},
- {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f46acd6a194287b7e41e87957bfe2ad1ad88318d447caf5b090012f2c5bb528"},
- {file = "aiohttp-3.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:feeb18a801aacb098220e2c3eea59a512362eb408d4afd0c242044c33ad6d542"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f734e38fd8666f53da904c52a23ce517f1b07722118d750405af7e4123933511"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b40670ec7e2156d8e57f70aec34a7216407848dfe6c693ef131ddf6e76feb672"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdd215b7b7fd4a53994f238d0f46b7ba4ac4c0adb12452beee724ddd0743ae5d"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:017a21b0df49039c8f46ca0971b3a7fdc1f56741ab1240cb90ca408049766168"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99abf0bba688259a496f966211c49a514e65afa9b3073a1fcee08856e04425b"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:648056db9a9fa565d3fa851880f99f45e3f9a771dd3ff3bb0c048ea83fb28194"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8aacb477dc26797ee089721536a292a664846489c49d3ef9725f992449eda5a8"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:522a11c934ea660ff8953eda090dcd2154d367dec1ae3c540aff9f8a5c109ab4"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5bce0dc147ca85caa5d33debc4f4d65e8e8b5c97c7f9f660f215fa74fc49a321"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b4af9f25b49a7be47c0972139e59ec0e8285c371049df1a63b6ca81fdd216a2"},
- {file = "aiohttp-3.9.3-cp38-cp38-win32.whl", hash = "sha256:298abd678033b8571995650ccee753d9458dfa0377be4dba91e4491da3f2be63"},
- {file = "aiohttp-3.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:69361bfdca5468c0488d7017b9b1e5ce769d40b46a9f4a2eed26b78619e9396c"},
- {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0fa43c32d1643f518491d9d3a730f85f5bbaedcbd7fbcae27435bb8b7a061b29"},
- {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:835a55b7ca49468aaaac0b217092dfdff370e6c215c9224c52f30daaa735c1c1"},
- {file = "aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06a9b2c8837d9a94fae16c6223acc14b4dfdff216ab9b7202e07a9a09541168f"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abf151955990d23f84205286938796c55ff11bbfb4ccfada8c9c83ae6b3c89a3"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59c26c95975f26e662ca78fdf543d4eeaef70e533a672b4113dd888bd2423caa"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f95511dd5d0e05fd9728bac4096319f80615aaef4acbecb35a990afebe953b0e"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595f105710293e76b9dc09f52e0dd896bd064a79346234b521f6b968ffdd8e58"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c8b816c2b5af5c8a436df44ca08258fc1a13b449393a91484225fcb7545533"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f1088fa100bf46e7b398ffd9904f4808a0612e1d966b4aa43baa535d1b6341eb"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f59dfe57bb1ec82ac0698ebfcdb7bcd0e99c255bd637ff613760d5f33e7c81b3"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:361a1026c9dd4aba0109e4040e2aecf9884f5cfe1b1b1bd3d09419c205e2e53d"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:363afe77cfcbe3a36353d8ea133e904b108feea505aa4792dad6585a8192c55a"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e2c45c208c62e955e8256949eb225bd8b66a4c9b6865729a786f2aa79b72e9d"},
- {file = "aiohttp-3.9.3-cp39-cp39-win32.whl", hash = "sha256:f7217af2e14da0856e082e96ff637f14ae45c10a5714b63c77f26d8884cf1051"},
- {file = "aiohttp-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:27468897f628c627230dba07ec65dc8d0db566923c48f29e084ce382119802bc"},
- {file = "aiohttp-3.9.3.tar.gz", hash = "sha256:90842933e5d1ff760fae6caca4b2b3edba53ba8f4b71e95dacf2818a2aca06f7"},
-]
-
-[package.dependencies]
-aiosignal = ">=1.1.2"
-async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""}
-attrs = ">=17.3.0"
-frozenlist = ">=1.1.1"
-multidict = ">=4.5,<7.0"
-yarl = ">=1.0,<2.0"
-
-[package.extras]
-speedups = ["Brotli", "aiodns", "brotlicffi"]
-
-[[package]]
-name = "aiosignal"
-version = "1.3.1"
-description = "aiosignal: a list of registered asynchronous callbacks"
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
- {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
-]
-
-[package.dependencies]
-frozenlist = ">=1.1.0"
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "annotated-types"
@@ -192,17 +54,6 @@ wrapt = [
{version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
]
-[[package]]
-name = "async-timeout"
-version = "4.0.3"
-description = "Timeout context manager for asyncio programs"
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
- {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
-]
-
[[package]]
name = "attrs"
version = "23.2.0"
@@ -495,50 +346,6 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1
[package.extras]
toml = ["tomli"]
-[[package]]
-name = "datasets"
-version = "2.16.0"
-description = "HuggingFace community-driven open-source library of datasets"
-optional = true
-python-versions = ">=3.8.0"
-files = [
- {file = "datasets-2.16.0-py3-none-any.whl", hash = "sha256:301cc39b3d81cd751100b79c85f8ae8626c17b0b113819ba2831c204d90b43f2"},
- {file = "datasets-2.16.0.tar.gz", hash = "sha256:91b06f7a8f0329179e7d603004102a6cc7a424a2f599315297a061caa1f8fa64"},
-]
-
-[package.dependencies]
-aiohttp = "*"
-dill = ">=0.3.0,<0.3.8"
-filelock = "*"
-fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]}
-huggingface-hub = ">=0.19.4"
-multiprocess = "*"
-numpy = ">=1.17"
-packaging = "*"
-pandas = "*"
-pyarrow = ">=8.0.0"
-pyarrow-hotfix = "*"
-pyyaml = ">=5.1"
-requests = ">=2.19.0"
-tqdm = ">=4.62.1"
-xxhash = "*"
-
-[package.extras]
-apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"]
-audio = ["librosa", "soundfile (>=0.12.1)"]
-benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.1.5)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
-docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"]
-jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
-metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
-quality = ["ruff (>=0.1.5)"]
-s3 = ["s3fs"]
-tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"]
-tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
-tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
-torch = ["torch"]
-vision = ["Pillow (>=6.2.1)"]
-
[[package]]
name = "dill"
version = "0.3.7"
@@ -573,17 +380,6 @@ idna = ["idna (>=2.1)"]
trio = ["trio (>=0.14)"]
wmi = ["wmi (>=1.5.1)"]
-[[package]]
-name = "einops"
-version = "0.6.1"
-description = "A new flavour of deep learning operations"
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "einops-0.6.1-py3-none-any.whl", hash = "sha256:99149e46cc808956b174932fe563d920db4d6e5dadb8c6ecdaa7483b7ef7cfc3"},
- {file = "einops-0.6.1.tar.gz", hash = "sha256:f95f8d00f4ded90dbc4b19b6f98b177332614b0357dde66997f3ae5d474dc8c8"},
-]
-
[[package]]
name = "email-validator"
version = "2.1.0.post1"
@@ -633,147 +429,6 @@ typing-extensions = ">=4.8.0"
[package.extras]
all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
-[[package]]
-name = "filelock"
-version = "3.13.1"
-description = "A platform independent file lock."
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"},
- {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"},
-]
-
-[package.extras]
-docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
-typing = ["typing-extensions (>=4.8)"]
-
-[[package]]
-name = "frozenlist"
-version = "1.4.1"
-description = "A list-like structure which implements collections.abc.MutableSequence"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"},
- {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"},
- {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"},
- {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"},
- {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"},
- {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"},
- {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"},
- {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"},
- {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"},
- {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"},
- {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"},
- {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"},
- {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"},
- {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"},
- {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"},
- {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"},
- {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"},
- {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"},
- {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"},
- {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"},
- {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"},
- {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"},
- {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"},
- {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"},
- {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"},
- {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"},
- {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"},
- {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"},
- {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"},
- {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"},
- {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"},
- {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"},
- {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"},
- {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"},
- {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"},
- {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"},
- {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"},
- {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"},
- {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"},
- {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"},
- {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"},
- {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"},
- {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"},
- {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"},
- {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"},
- {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"},
- {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"},
- {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"},
- {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"},
- {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"},
- {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"},
- {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"},
- {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"},
- {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"},
- {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"},
- {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"},
- {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"},
- {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"},
- {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"},
- {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"},
- {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"},
- {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"},
- {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"},
- {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"},
- {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"},
- {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"},
- {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"},
- {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"},
- {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"},
- {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"},
- {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"},
- {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"},
- {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"},
- {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"},
- {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"},
- {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"},
- {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"},
-]
-
-[[package]]
-name = "fsspec"
-version = "2023.10.0"
-description = "File-system specification"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"},
- {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"},
-]
-
-[package.dependencies]
-aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
-requests = {version = "*", optional = true, markers = "extra == \"http\""}
-
-[package.extras]
-abfs = ["adlfs"]
-adl = ["adlfs"]
-arrow = ["pyarrow (>=1)"]
-dask = ["dask", "distributed"]
-devel = ["pytest", "pytest-cov"]
-dropbox = ["dropbox", "dropboxdrivefs", "requests"]
-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
-fuse = ["fusepy"]
-gcs = ["gcsfs"]
-git = ["pygit2"]
-github = ["requests"]
-gs = ["gcsfs"]
-gui = ["panel"]
-hdfs = ["pyarrow (>=1)"]
-http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
-libarchive = ["libarchive-c"]
-oci = ["ocifs"]
-s3 = ["s3fs"]
-sftp = ["paramiko"]
-smb = ["smbprotocol"]
-ssh = ["paramiko"]
-tqdm = ["tqdm"]
-
[[package]]
name = "gql"
version = "3.5.0"
@@ -956,43 +611,6 @@ files = [
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
-[[package]]
-name = "h5py"
-version = "3.10.0"
-description = "Read and write HDF5 files from Python"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "h5py-3.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b963fb772964fc1d1563c57e4e2e874022ce11f75ddc6df1a626f42bd49ab99f"},
- {file = "h5py-3.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:012ab448590e3c4f5a8dd0f3533255bc57f80629bf7c5054cf4c87b30085063c"},
- {file = "h5py-3.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:781a24263c1270a62cd67be59f293e62b76acfcc207afa6384961762bb88ea03"},
- {file = "h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42e6c30698b520f0295d70157c4e202a9e402406f50dc08f5a7bc416b24e52d"},
- {file = "h5py-3.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:93dd840bd675787fc0b016f7a05fc6efe37312a08849d9dd4053fd0377b1357f"},
- {file = "h5py-3.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2381e98af081b6df7f6db300cd88f88e740649d77736e4b53db522d8874bf2dc"},
- {file = "h5py-3.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:667fe23ab33d5a8a6b77970b229e14ae3bb84e4ea3382cc08567a02e1499eedd"},
- {file = "h5py-3.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90286b79abd085e4e65e07c1bd7ee65a0f15818ea107f44b175d2dfe1a4674b7"},
- {file = "h5py-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c013d2e79c00f28ffd0cc24e68665ea03ae9069e167087b2adb5727d2736a52"},
- {file = "h5py-3.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:92273ce69ae4983dadb898fd4d3bea5eb90820df953b401282ee69ad648df684"},
- {file = "h5py-3.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c97d03f87f215e7759a354460fb4b0d0f27001450b18b23e556e7856a0b21c3"},
- {file = "h5py-3.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86df4c2de68257b8539a18646ceccdcf2c1ce6b1768ada16c8dcfb489eafae20"},
- {file = "h5py-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba9ab36be991119a3ff32d0c7cbe5faf9b8d2375b5278b2aea64effbeba66039"},
- {file = "h5py-3.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c8e4fda19eb769e9a678592e67eaec3a2f069f7570c82d2da909c077aa94339"},
- {file = "h5py-3.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:492305a074327e8d2513011fa9fffeb54ecb28a04ca4c4227d7e1e9616d35641"},
- {file = "h5py-3.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9450464b458cca2c86252b624279115dcaa7260a40d3cb1594bf2b410a2bd1a3"},
- {file = "h5py-3.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd6f6d1384a9f491732cee233b99cd4bfd6e838a8815cc86722f9d2ee64032af"},
- {file = "h5py-3.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3074ec45d3dc6e178c6f96834cf8108bf4a60ccb5ab044e16909580352010a97"},
- {file = "h5py-3.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:212bb997a91e6a895ce5e2f365ba764debeaef5d2dca5c6fb7098d66607adf99"},
- {file = "h5py-3.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5dfc65ac21fa2f630323c92453cadbe8d4f504726ec42f6a56cf80c2f90d6c52"},
- {file = "h5py-3.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d4682b94fd36ab217352be438abd44c8f357c5449b8995e63886b431d260f3d3"},
- {file = "h5py-3.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aece0e2e1ed2aab076c41802e50a0c3e5ef8816d60ece39107d68717d4559824"},
- {file = "h5py-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43a61b2c2ad65b1fabc28802d133eed34debcc2c8b420cb213d3d4ef4d3e2229"},
- {file = "h5py-3.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:ae2f0201c950059676455daf92700eeb57dcf5caaf71b9e1328e6e6593601770"},
- {file = "h5py-3.10.0.tar.gz", hash = "sha256:d93adc48ceeb33347eb24a634fb787efc7ae4644e6ea4ba733d099605045c049"},
-]
-
-[package.dependencies]
-numpy = ">=1.17.3"
-
[[package]]
name = "httpcore"
version = "0.17.3"
@@ -1037,40 +655,6 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
-[[package]]
-name = "huggingface-hub"
-version = "0.23.2"
-description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
-optional = true
-python-versions = ">=3.8.0"
-files = [
- {file = "huggingface_hub-0.23.2-py3-none-any.whl", hash = "sha256:48727a16e704d409c4bb5913613308499664f22a99743435dc3a13b23c485827"},
- {file = "huggingface_hub-0.23.2.tar.gz", hash = "sha256:f6829b62d5fdecb452a76fdbec620cba4c1573655a8d710c1df71735fd9edbd2"},
-]
-
-[package.dependencies]
-filelock = "*"
-fsspec = ">=2023.5.0"
-packaging = ">=20.9"
-pyyaml = ">=5.1"
-requests = "*"
-tqdm = ">=4.42.1"
-typing-extensions = ">=3.7.4.3"
-
-[package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
-cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
-fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
-hf-transfer = ["hf-transfer (>=0.1.4)"]
-inference = ["aiohttp", "minijinja (>=1.0)"]
-quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"]
-tensorflow = ["graphviz", "pydot", "tensorflow"]
-tensorflow-testing = ["keras (<3.0)", "tensorflow"]
-testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
-torch = ["safetensors", "torch"]
-typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
-
[[package]]
name = "idna"
version = "3.6"
@@ -1142,23 +726,6 @@ files = [
[package.extras]
colors = ["colorama (>=0.4.6)"]
-[[package]]
-name = "jinja2"
-version = "3.1.3"
-description = "A very fast and expressive template engine."
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
- {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
-]
-
-[package.dependencies]
-MarkupSafe = ">=2.0"
-
-[package.extras]
-i18n = ["Babel (>=2.7)"]
-
[[package]]
name = "jsonschema"
version = "4.21.1"
@@ -1243,75 +810,6 @@ files = [
{file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"},
]
-[[package]]
-name = "markupsafe"
-version = "2.1.5"
-description = "Safely add untrusted strings to HTML/XML markup."
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"},
- {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"},
- {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"},
- {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"},
- {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"},
- {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"},
- {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"},
- {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
-]
-
[[package]]
name = "mccabe"
version = "0.7.0"
@@ -1323,23 +821,6 @@ files = [
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
]
-[[package]]
-name = "mpmath"
-version = "1.3.0"
-description = "Python library for arbitrary-precision floating-point arithmetic"
-optional = true
-python-versions = "*"
-files = [
- {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
- {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
-]
-
-[package.extras]
-develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
-docs = ["sphinx"]
-gmpy = ["gmpy2 (>=2.1.0a4)"]
-tests = ["pytest (>=4.6)"]
-
[[package]]
name = "multidict"
version = "6.0.5"
@@ -1439,34 +920,6 @@ files = [
{file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
]
-[[package]]
-name = "multiprocess"
-version = "0.70.15"
-description = "better multiprocessing and multithreading in Python"
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "multiprocess-0.70.15-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:aa36c7ed16f508091438687fe9baa393a7a8e206731d321e443745e743a0d4e5"},
- {file = "multiprocess-0.70.15-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:20e024018c46d0d1602024c613007ac948f9754659e3853b0aa705e83f6931d8"},
- {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:e576062981c91f0fe8a463c3d52506e598dfc51320a8dd8d78b987dfca91c5db"},
- {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e73f497e6696a0f5433ada2b3d599ae733b87a6e8b008e387c62ac9127add177"},
- {file = "multiprocess-0.70.15-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73db2e7b32dcc7f9b0f075c2ffa45c90b6729d3f1805f27e88534c8d321a1be5"},
- {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:4271647bd8a49c28ecd6eb56a7fdbd3c212c45529ad5303b40b3c65fc6928e5f"},
- {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cf981fb998d6ec3208cb14f0cf2e9e80216e834f5d51fd09ebc937c32b960902"},
- {file = "multiprocess-0.70.15-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:18f9f2c7063346d1617bd1684fdcae8d33380ae96b99427260f562e1a1228b67"},
- {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:0eac53214d664c49a34695e5824872db4006b1a465edd7459a251809c3773370"},
- {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1a51dd34096db47fb21fa2b839e615b051d51b97af9a67afbcdaa67186b44883"},
- {file = "multiprocess-0.70.15-py310-none-any.whl", hash = "sha256:7dd58e33235e83cf09d625e55cffd7b0f0eede7ee9223cdd666a87624f60c21a"},
- {file = "multiprocess-0.70.15-py311-none-any.whl", hash = "sha256:134f89053d82c9ed3b73edd3a2531eb791e602d4f4156fc92a79259590bd9670"},
- {file = "multiprocess-0.70.15-py37-none-any.whl", hash = "sha256:f7d4a1629bccb433114c3b4885f69eccc200994323c80f6feee73b0edc9199c5"},
- {file = "multiprocess-0.70.15-py38-none-any.whl", hash = "sha256:bee9afba476c91f9ebee7beeee0601face9eff67d822e893f9a893725fbd6316"},
- {file = "multiprocess-0.70.15-py39-none-any.whl", hash = "sha256:3e0953f5d52b4c76f1c973eaf8214554d146f2be5decb48e928e55c7a2d19338"},
- {file = "multiprocess-0.70.15.tar.gz", hash = "sha256:f20eed3036c0ef477b07a4177cf7c1ba520d9a2677870a4f47fe026f0cd6787e"},
-]
-
-[package.dependencies]
-dill = ">=0.3.7"
-
[[package]]
name = "mypy"
version = "1.8.0"
@@ -1525,203 +978,6 @@ files = [
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
]
-[[package]]
-name = "networkx"
-version = "3.1"
-description = "Python package for creating and manipulating graphs and networks"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"},
- {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"},
-]
-
-[package.extras]
-default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"]
-developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"]
-doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"]
-extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"]
-test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
-
-[[package]]
-name = "numpy"
-version = "1.24.4"
-description = "Fundamental package for array computing in Python"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"},
- {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"},
- {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"},
- {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"},
- {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"},
- {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"},
- {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"},
- {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"},
- {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"},
- {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"},
- {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"},
- {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"},
- {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"},
- {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"},
- {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"},
- {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"},
- {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"},
- {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"},
- {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"},
- {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"},
- {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"},
- {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"},
- {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"},
- {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"},
- {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"},
- {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"},
- {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"},
- {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"},
-]
-
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.1.3.1"
-description = "CUBLAS native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
- {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.1.105"
-description = "CUDA profiling tools runtime libs."
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
- {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.1.105"
-description = "NVRTC native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
- {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.1.105"
-description = "CUDA Runtime native Libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
- {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "8.9.2.26"
-description = "cuDNN runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"},
-]
-
-[package.dependencies]
-nvidia-cublas-cu12 = "*"
-
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.0.2.54"
-description = "CUFFT native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
- {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.2.106"
-description = "CURAND native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
- {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.4.5.107"
-description = "CUDA solver native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
- {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
-]
-
-[package.dependencies]
-nvidia-cublas-cu12 = "*"
-nvidia-cusparse-cu12 = "*"
-nvidia-nvjitlink-cu12 = "*"
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.1.0.106"
-description = "CUSPARSE native runtime libraries"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
- {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
-]
-
-[package.dependencies]
-nvidia-nvjitlink-cu12 = "*"
-
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.19.3"
-description = "NVIDIA Collective Communication Library (NCCL) Runtime"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d"},
-]
-
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.3.101"
-description = "Nvidia JIT LTO Library"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"},
- {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux2014_aarch64.whl", hash = "sha256:211a63e7b30a9d62f1a853e19928fbb1a750e3f17a13a3d1f98ff0ced19478dd"},
- {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"},
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.1.105"
-description = "NVIDIA Tools Extension"
-optional = true
-python-versions = ">=3"
-files = [
- {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
- {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
-]
-
[[package]]
name = "packaging"
version = "23.2"
@@ -1733,73 +989,6 @@ files = [
{file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
]
-[[package]]
-name = "pandas"
-version = "2.0.3"
-description = "Powerful data structures for data analysis, time series, and statistics"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"},
- {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"},
- {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"},
- {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"},
- {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"},
- {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"},
- {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"},
- {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"},
- {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"},
- {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"},
- {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"},
- {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"},
- {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"},
- {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"},
- {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"},
- {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"},
- {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"},
- {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"},
- {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"},
- {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"},
- {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"},
- {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"},
- {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"},
- {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"},
- {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"},
-]
-
-[package.dependencies]
-numpy = [
- {version = ">=1.20.3", markers = "python_version < \"3.10\""},
- {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
- {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
-]
-python-dateutil = ">=2.8.2"
-pytz = ">=2020.1"
-tzdata = ">=2022.1"
-
-[package.extras]
-all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"]
-aws = ["s3fs (>=2021.08.0)"]
-clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"]
-compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"]
-computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"]
-excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"]
-feather = ["pyarrow (>=7.0.0)"]
-fss = ["fsspec (>=2021.07.0)"]
-gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"]
-hdf5 = ["tables (>=3.6.1)"]
-html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"]
-mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"]
-output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"]
-parquet = ["pyarrow (>=7.0.0)"]
-performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"]
-plot = ["matplotlib (>=3.6.1)"]
-postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"]
-spss = ["pyreadstat (>=1.1.2)"]
-sql-other = ["SQLAlchemy (>=1.4.16)"]
-test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
-xml = ["lxml (>=4.6.3)"]
-
[[package]]
name = "pathspec"
version = "0.9.0"
@@ -1811,34 +1000,6 @@ files = [
{file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"},
]
-[[package]]
-name = "peft"
-version = "0.6.0"
-description = "Parameter-Efficient Fine-Tuning (PEFT)"
-optional = true
-python-versions = ">=3.8.0"
-files = [
- {file = "peft-0.6.0-py3-none-any.whl", hash = "sha256:d7fb6335beb20074f70d464aa1f2bb1ddca0875126316320a2781b04364f72a6"},
- {file = "peft-0.6.0.tar.gz", hash = "sha256:6c381208f705cd38f2cc91dc2943ac4df2615680bd75d7320d010f8f2e48e65d"},
-]
-
-[package.dependencies]
-accelerate = ">=0.21.0"
-numpy = ">=1.17"
-packaging = ">=20.0"
-psutil = "*"
-pyyaml = "*"
-safetensors = "*"
-torch = ">=1.13.0"
-tqdm = "*"
-transformers = "*"
-
-[package.extras]
-dev = ["black (>=22.0,<23.0)", "hf-doc-builder", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"]
-docs-specific = ["hf-doc-builder"]
-quality = ["black (>=22.0,<23.0)", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"]
-test = ["black (>=22.0,<23.0)", "datasets", "diffusers (<0.21.0)", "hf-doc-builder", "parameterized", "pytest", "pytest-cov", "pytest-xdist", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"]
-
[[package]]
name = "pkgutil-resolve-name"
version = "1.3.10"
@@ -1900,93 +1061,6 @@ files = [
{file = "protobuf-5.27.1.tar.gz", hash = "sha256:df5e5b8e39b7d1c25b186ffdf9f44f40f810bbcc9d2b71d9d3156fee5a9adf15"},
]
-[[package]]
-name = "psutil"
-version = "5.9.8"
-description = "Cross-platform lib for process and system monitoring in Python."
-optional = true
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
-files = [
- {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"},
- {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"},
- {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"},
- {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"},
- {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"},
- {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"},
- {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"},
- {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"},
- {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"},
- {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"},
- {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"},
- {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"},
- {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"},
- {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"},
- {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"},
- {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"},
-]
-
-[package.extras]
-test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
-
-[[package]]
-name = "pyarrow"
-version = "15.0.0"
-description = "Python library for Apache Arrow"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"},
- {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"},
- {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"},
- {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"},
- {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"},
- {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"},
- {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"},
- {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"},
- {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"},
- {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"},
- {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"},
- {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"},
- {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"},
- {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"},
- {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"},
- {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"},
- {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"},
- {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"},
- {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"},
- {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"},
- {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"},
- {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"},
- {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"},
- {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"},
- {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"},
- {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"},
- {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"},
- {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"},
- {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"},
- {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"},
- {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"},
- {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"},
- {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"},
- {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"},
- {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"},
- {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"},
-]
-
-[package.dependencies]
-numpy = ">=1.16.6,<2"
-
-[[package]]
-name = "pyarrow-hotfix"
-version = "0.6"
-description = ""
-optional = true
-python-versions = ">=3.5"
-files = [
- {file = "pyarrow_hotfix-0.6-py3-none-any.whl", hash = "sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178"},
- {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"},
-]
-
[[package]]
name = "pydantic"
version = "2.6.1"
@@ -2216,31 +1290,6 @@ pytest = ">=4.6"
[package.extras]
testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
-[[package]]
-name = "python-dateutil"
-version = "2.8.2"
-description = "Extensions to the standard Python datetime module"
-optional = true
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-files = [
- {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
- {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
-]
-
-[package.dependencies]
-six = ">=1.5"
-
-[[package]]
-name = "pytz"
-version = "2024.1"
-description = "World timezone definitions, modern and historical"
-optional = true
-python-versions = "*"
-files = [
- {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
- {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
-]
-
[[package]]
name = "pyyaml"
version = "6.0.1"
@@ -2316,108 +1365,6 @@ files = [
attrs = ">=22.2.0"
rpds-py = ">=0.7.0"
-[[package]]
-name = "regex"
-version = "2023.12.25"
-description = "Alternative regular expression module, to replace re."
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"},
- {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"},
- {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"},
- {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"},
- {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"},
- {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"},
- {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"},
- {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"},
- {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"},
- {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"},
- {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"},
- {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"},
- {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"},
- {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"},
- {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"},
- {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"},
- {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"},
- {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"},
- {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"},
- {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"},
- {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"},
- {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"},
- {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"},
- {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"},
- {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"},
- {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"},
- {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"},
- {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"},
- {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"},
- {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"},
- {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"},
- {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8a0ccf52bb37d1a700375a6b395bff5dd15c50acb745f7db30415bae3c2b0715"},
- {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c3c4a78615b7762740531c27cf46e2f388d8d727d0c0c739e72048beb26c8a9d"},
- {file = "regex-2023.12.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad83e7545b4ab69216cef4cc47e344d19622e28aabec61574b20257c65466d6a"},
- {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7a635871143661feccce3979e1727c4e094f2bdfd3ec4b90dfd4f16f571a87a"},
- {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d498eea3f581fbe1b34b59c697512a8baef88212f92e4c7830fcc1499f5b45a5"},
- {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43f7cd5754d02a56ae4ebb91b33461dc67be8e3e0153f593c509e21d219c5060"},
- {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51f4b32f793812714fd5307222a7f77e739b9bc566dc94a18126aba3b92b98a3"},
- {file = "regex-2023.12.25-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba99d8077424501b9616b43a2d208095746fb1284fc5ba490139651f971d39d9"},
- {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4bfc2b16e3ba8850e0e262467275dd4d62f0d045e0e9eda2bc65078c0110a11f"},
- {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8c2c19dae8a3eb0ea45a8448356ed561be843b13cbc34b840922ddf565498c1c"},
- {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:60080bb3d8617d96f0fb7e19796384cc2467447ef1c491694850ebd3670bc457"},
- {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b77e27b79448e34c2c51c09836033056a0547aa360c45eeeb67803da7b0eedaf"},
- {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:518440c991f514331f4850a63560321f833979d145d7d81186dbe2f19e27ae3d"},
- {file = "regex-2023.12.25-cp312-cp312-win32.whl", hash = "sha256:e2610e9406d3b0073636a3a2e80db05a02f0c3169b5632022b4e81c0364bcda5"},
- {file = "regex-2023.12.25-cp312-cp312-win_amd64.whl", hash = "sha256:cc37b9aeebab425f11f27e5e9e6cf580be7206c6582a64467a14dda211abc232"},
- {file = "regex-2023.12.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:da695d75ac97cb1cd725adac136d25ca687da4536154cdc2815f576e4da11c69"},
- {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d126361607b33c4eb7b36debc173bf25d7805847346dd4d99b5499e1fef52bc7"},
- {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4719bb05094d7d8563a450cf8738d2e1061420f79cfcc1fa7f0a44744c4d8f73"},
- {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dd58946bce44b53b06d94aa95560d0b243eb2fe64227cba50017a8d8b3cd3e2"},
- {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22a86d9fff2009302c440b9d799ef2fe322416d2d58fc124b926aa89365ec482"},
- {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2aae8101919e8aa05ecfe6322b278f41ce2994c4a430303c4cd163fef746e04f"},
- {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e692296c4cc2873967771345a876bcfc1c547e8dd695c6b89342488b0ea55cd8"},
- {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:263ef5cc10979837f243950637fffb06e8daed7f1ac1e39d5910fd29929e489a"},
- {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d6f7e255e5fa94642a0724e35406e6cb7001c09d476ab5fce002f652b36d0c39"},
- {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:88ad44e220e22b63b0f8f81f007e8abbb92874d8ced66f32571ef8beb0643b2b"},
- {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3a17d3ede18f9cedcbe23d2daa8a2cd6f59fe2bf082c567e43083bba3fb00347"},
- {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d15b274f9e15b1a0b7a45d2ac86d1f634d983ca40d6b886721626c47a400bf39"},
- {file = "regex-2023.12.25-cp37-cp37m-win32.whl", hash = "sha256:ed19b3a05ae0c97dd8f75a5d8f21f7723a8c33bbc555da6bbe1f96c470139d3c"},
- {file = "regex-2023.12.25-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d1047952c0b8104a1d371f88f4ab62e6275567d4458c1e26e9627ad489b445"},
- {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b43523d7bc2abd757119dbfb38af91b5735eea45537ec6ec3a5ec3f9562a1c53"},
- {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:efb2d82f33b2212898f1659fb1c2e9ac30493ac41e4d53123da374c3b5541e64"},
- {file = "regex-2023.12.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7fca9205b59c1a3d5031f7e64ed627a1074730a51c2a80e97653e3e9fa0d415"},
- {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086dd15e9435b393ae06f96ab69ab2d333f5d65cbe65ca5a3ef0ec9564dfe770"},
- {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e81469f7d01efed9b53740aedd26085f20d49da65f9c1f41e822a33992cb1590"},
- {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34e4af5b27232f68042aa40a91c3b9bb4da0eeb31b7632e0091afc4310afe6cb"},
- {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9852b76ab558e45b20bf1893b59af64a28bd3820b0c2efc80e0a70a4a3ea51c1"},
- {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff100b203092af77d1a5a7abe085b3506b7eaaf9abf65b73b7d6905b6cb76988"},
- {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cc038b2d8b1470364b1888a98fd22d616fba2b6309c5b5f181ad4483e0017861"},
- {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:094ba386bb5c01e54e14434d4caabf6583334090865b23ef58e0424a6286d3dc"},
- {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5cd05d0f57846d8ba4b71d9c00f6f37d6b97d5e5ef8b3c3840426a475c8f70f4"},
- {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:9aa1a67bbf0f957bbe096375887b2505f5d8ae16bf04488e8b0f334c36e31360"},
- {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:98a2636994f943b871786c9e82bfe7883ecdaba2ef5df54e1450fa9869d1f756"},
- {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37f8e93a81fc5e5bd8db7e10e62dc64261bcd88f8d7e6640aaebe9bc180d9ce2"},
- {file = "regex-2023.12.25-cp38-cp38-win32.whl", hash = "sha256:d78bd484930c1da2b9679290a41cdb25cc127d783768a0369d6b449e72f88beb"},
- {file = "regex-2023.12.25-cp38-cp38-win_amd64.whl", hash = "sha256:b521dcecebc5b978b447f0f69b5b7f3840eac454862270406a39837ffae4e697"},
- {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"},
- {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"},
- {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"},
- {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"},
- {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"},
- {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"},
- {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"},
- {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"},
- {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"},
- {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"},
- {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"},
- {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"},
- {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"},
- {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"},
- {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"},
- {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"},
- {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"},
-]
-
[[package]]
name = "requests"
version = "2.31.0"
@@ -2585,125 +1532,6 @@ files = [
{file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"},
]
-[[package]]
-name = "safetensors"
-version = "0.4.1"
-description = ""
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "safetensors-0.4.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:cba01c6b76e01ec453933b3b3c0157c59b52881c83eaa0f7666244e71aa75fd1"},
- {file = "safetensors-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a8f6f679d97ea0135c7935c202feefbd042c149aa70ee759855e890c01c7814"},
- {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc2ce1f5ae5143a7fb72b71fa71db6a42b4f6cf912aa3acdc6b914084778e68"},
- {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2d87d993eaefe6611a9c241a8bd364a5f1ffed5771c74840363a6c4ed8d868f6"},
- {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:097e9af2efa8778cd2f0cba451784253e62fa7cc9fc73c0744d27212f7294e25"},
- {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d10a9f7bae608ccfdc009351f01dc3d8535ff57f9488a58a4c38e45bf954fe93"},
- {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:270b99885ec14abfd56c1d7f28ada81740a9220b4bae960c3de1c6fe84af9e4d"},
- {file = "safetensors-0.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:285b52a481e7ba93e29ad4ec5841ef2c4479ef0a6c633c4e2629e0508453577b"},
- {file = "safetensors-0.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c3c9f0ca510e0de95abd6424789dcbc879942a3a4e29b0dfa99d9427bf1da75c"},
- {file = "safetensors-0.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:88b4653059c903015284a9722f9a46838c654257173b279c8f6f46dbe80b612d"},
- {file = "safetensors-0.4.1-cp310-none-win32.whl", hash = "sha256:2fe6926110e3d425c4b684a4379b7796fdc26ad7d16922ea1696c8e6ea7e920f"},
- {file = "safetensors-0.4.1-cp310-none-win_amd64.whl", hash = "sha256:a79e16222106b2f5edbca1b8185661477d8971b659a3c814cc6f15181a9b34c8"},
- {file = "safetensors-0.4.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:d93321eea0dd7e81b283e47a1d20dee6069165cc158286316d0d06d340de8fe8"},
- {file = "safetensors-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ff8e41c8037db17de0ea2a23bc684f43eaf623be7d34906fe1ac10985b8365e"},
- {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39d36f1d88468a87c437a1bc27c502e71b6ca44c385a9117a9f9ba03a75cc9c6"},
- {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7ef010e9afcb4057fb6be3d0a0cfa07aac04fe97ef73fe4a23138d8522ba7c17"},
- {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b287304f2b2220d51ccb51fd857761e78bcffbeabe7b0238f8dc36f2edfd9542"},
- {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e09000b2599e1836314430f81a3884c66a5cbabdff5d9f175b5d560d4de38d78"},
- {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9c80ce0001efa16066358d2dd77993adc25f5a6c61850e4ad096a2232930bce"},
- {file = "safetensors-0.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:413e1f6ac248f7d1b755199a06635e70c3515493d3b41ba46063dec33aa2ebb7"},
- {file = "safetensors-0.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3ac139377cfe71ba04573f1cda66e663b7c3e95be850e9e6c2dd4b5984bd513"},
- {file = "safetensors-0.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:04157d008385bea66d12fe90844a80d4a76dc25ec5230b5bd9a630496d1b7c03"},
- {file = "safetensors-0.4.1-cp311-none-win32.whl", hash = "sha256:5f25297148ec665f0deb8bd67e9564634d8d6841041ab5393ccfe203379ea88b"},
- {file = "safetensors-0.4.1-cp311-none-win_amd64.whl", hash = "sha256:b2f8877990a72ff595507b80f4b69036a9a1986a641f8681adf3425d97d3d2a5"},
- {file = "safetensors-0.4.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:eb2c1da1cc39509d1a55620a5f4d14f8911c47a89c926a96e6f4876e864375a3"},
- {file = "safetensors-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:303d2c0415cf15a28f8d7f17379ea3c34c2b466119118a34edd9965983a1a8a6"},
- {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb4cb3e37a9b961ddd68e873b29fe9ab4a081e3703412e34aedd2b7a8e9cafd9"},
- {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae5497adc68669db2fed7cb2dad81e6a6106e79c9a132da3efdb6af1db1014fa"},
- {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b30abd0cddfe959d1daedf92edcd1b445521ebf7ddefc20860ed01486b33c90"},
- {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d784a98c492c751f228a4a894c3b8a092ff08b24e73b5568938c28b8c0e8f8df"},
- {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e57a5ab08b0ec7a7caf30d2ac79bb30c89168431aca4f8854464bb9461686925"},
- {file = "safetensors-0.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:edcf3121890b5f0616aa5a54683b1a5d2332037b970e507d6bb7841a3a596556"},
- {file = "safetensors-0.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fdb58dee173ef33634c3016c459d671ca12d11e6acf9db008261cbe58107e579"},
- {file = "safetensors-0.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:780dc21eb3fd32ddd0e8c904bdb0290f2454f4ac21ae71e94f9ce72db1900a5a"},
- {file = "safetensors-0.4.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:48901bd540f8a3c1791314bc5c8a170927bf7f6acddb75bf0a263d081a3637d4"},
- {file = "safetensors-0.4.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:3b0b7b2d5976fbed8a05e2bbdce5816a59e6902e9e7c7e07dc723637ed539787"},
- {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f69903ff49cb30b9227fb5d029bea276ea20d04b06803877a420c5b1b74c689"},
- {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0ddd050e01f3e843aa8c1c27bf68675b8a08e385d0045487af4d70418c3cb356"},
- {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a82bc2bd7a9a0e08239bdd6d7774d64121f136add93dfa344a2f1a6d7ef35fa"},
- {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ace9e66a40f98a216ad661245782483cf79cf56eb2b112650bb904b0baa9db5"},
- {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82cbb8f4d022f2e94498cbefca900698b8ded3d4f85212f47da614001ff06652"},
- {file = "safetensors-0.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:791edc10a3c359a2f5f52d5cddab0df8a45107d91027d86c3d44e57162e5d934"},
- {file = "safetensors-0.4.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:83c2cfbe8c6304f0891e7bb378d56f66d2148972eeb5f747cd8a2246886f0d8c"},
- {file = "safetensors-0.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:04dd14f53f5500eb4c4149674216ba1000670efbcf4b1b5c2643eb244e7882ea"},
- {file = "safetensors-0.4.1-cp37-none-win32.whl", hash = "sha256:d5b3defa74f3723a388bfde2f5d488742bc4879682bd93267c09a3bcdf8f869b"},
- {file = "safetensors-0.4.1-cp37-none-win_amd64.whl", hash = "sha256:25a043cbb59d4f75e9dd87fdf5c009dd8830105a2c57ace49b72167dd9808111"},
- {file = "safetensors-0.4.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:3f6a520af7f2717c5ecba112041f2c8af1ca6480b97bf957aba81ed9642e654c"},
- {file = "safetensors-0.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c3807ac3b16288dffebb3474b555b56fe466baa677dfc16290dcd02dca1ab228"},
- {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b58ba13a9e82b4bc3fc221914f6ef237fe6c2adb13cede3ace64d1aacf49610"},
- {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dac4bb42f8679aadc59bd91a4c5a1784a758ad49d0912995945cd674089f628e"},
- {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:911b48dc09e321a194def3a7431662ff4f03646832f3a8915bbf0f449b8a5fcb"},
- {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82571d20288c975c1b30b08deb9b1c3550f36b31191e1e81fae87669a92217d0"},
- {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da52ee0dc8ba03348ffceab767bd8230842fdf78f8a996e2a16445747143a778"},
- {file = "safetensors-0.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2536b11ce665834201072e9397404170f93f3be10cca9995b909f023a04501ee"},
- {file = "safetensors-0.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:998fbac99ca956c3a09fe07cc0b35fac26a521fa8865a690686d889f0ff4e4a6"},
- {file = "safetensors-0.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:845be0aafabf2a60c2d482d4e93023fecffe5e5443d801d7a7741bae9de41233"},
- {file = "safetensors-0.4.1-cp38-none-win32.whl", hash = "sha256:ce7a28bc8af685a69d7e869d09d3e180a275e3281e29cf5f1c7319e231932cc7"},
- {file = "safetensors-0.4.1-cp38-none-win_amd64.whl", hash = "sha256:e056fb9e22d118cc546107f97dc28b449d88274207dd28872bd668c86216e4f6"},
- {file = "safetensors-0.4.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:bdc0d039e44a727824639824090bd8869535f729878fa248addd3dc01db30eae"},
- {file = "safetensors-0.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c1b1d510c7aba71504ece87bf393ea82638df56303e371e5e2cf09d18977dd7"},
- {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bd0afd95c1e497f520e680ea01e0397c0868a3a3030e128438cf6e9e3fcd671"},
- {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f603bdd8deac6726d39f41688ed353c532dd53935234405d79e9eb53f152fbfb"},
- {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8a85e3e47e0d4eebfaf9a58b40aa94f977a56050cb5598ad5396a9ee7c087c6"},
- {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0ccb5aa0f3be2727117e5631200fbb3a5b3a2b3757545a92647d6dd8be6658f"},
- {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d784938534e255473155e4d9f276ee69eb85455b6af1292172c731409bf9adee"},
- {file = "safetensors-0.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a257de175c254d39ccd6a21341cd62eb7373b05c1e618a78096a56a857e0c316"},
- {file = "safetensors-0.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6fd80f7794554091836d4d613d33a7d006e2b8d6ba014d06f97cebdfda744f64"},
- {file = "safetensors-0.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:35803201d980efcf964b75a0a2aee97fe5e9ecc5f3ad676b38fafdfe98e0620d"},
- {file = "safetensors-0.4.1-cp39-none-win32.whl", hash = "sha256:7ff8a36e0396776d3ed9a106fc9a9d7c55d4439ca9a056a24bf66d343041d3e6"},
- {file = "safetensors-0.4.1-cp39-none-win_amd64.whl", hash = "sha256:bfa2e20342b81921b98edba52f8deb68843fa9c95250739a56b52ceda5ea5c61"},
- {file = "safetensors-0.4.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ae2d5a31cfb8a973a318f7c4d2cffe0bd1fe753cdf7bb41a1939d45a0a06f964"},
- {file = "safetensors-0.4.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1a45dbf03e8334d3a5dc93687d98b6dc422f5d04c7d519dac09b84a3c87dd7c6"},
- {file = "safetensors-0.4.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2297b359d91126c0f9d4fd17bae3cfa2fe3a048a6971b8db07db746ad92f850c"},
- {file = "safetensors-0.4.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bda3d98e2bcece388232cfc551ebf063b55bdb98f65ab54df397da30efc7dcc5"},
- {file = "safetensors-0.4.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8934bdfd202ebd0697040a3dff40dd77bc4c5bbf3527ede0532f5e7fb4d970f"},
- {file = "safetensors-0.4.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:42c3710cec7e5c764c7999697516370bee39067de0aa089b7e2cfb97ac8c6b20"},
- {file = "safetensors-0.4.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:53134226053e56bd56e73f7db42596e7908ed79f3c9a1016e4c1dade593ac8e5"},
- {file = "safetensors-0.4.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:257d59e40a1b367cb544122e7451243d65b33c3f34d822a347f4eea6fdf97fdf"},
- {file = "safetensors-0.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d54c2f1826e790d1eb2d2512bfd0ee443f0206b423d6f27095057c7f18a0687"},
- {file = "safetensors-0.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:645b3f1138fce6e818e79d4128afa28f0657430764cc045419c1d069ff93f732"},
- {file = "safetensors-0.4.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e9a7ffb1e551c6df51d267f5a751f042b183df22690f6feceac8d27364fd51d7"},
- {file = "safetensors-0.4.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:44e230fbbe120de564b64f63ef3a8e6ff02840fa02849d9c443d56252a1646d4"},
- {file = "safetensors-0.4.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9d16b3b2fcc6fca012c74bd01b5619c655194d3e3c13e4d4d0e446eefa39a463"},
- {file = "safetensors-0.4.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:5d95ea4d8b32233910734a904123bdd3979c137c461b905a5ed32511defc075f"},
- {file = "safetensors-0.4.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:dab431699b5d45e0ca043bc580651ce9583dda594e62e245b7497adb32e99809"},
- {file = "safetensors-0.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16d8bbb7344e39cb9d4762e85c21df94ebeb03edac923dd94bb9ed8c10eac070"},
- {file = "safetensors-0.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1faf5111c66a6ba91f85dff2e36edaaf36e6966172703159daeef330de4ddc7b"},
- {file = "safetensors-0.4.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:660ca1d8bff6c7bc7c6b30b9b32df74ef3ab668f5df42cefd7588f0d40feadcb"},
- {file = "safetensors-0.4.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ae2f67f04ed0bb2e56fd380a8bd3eef03f609df53f88b6f5c7e89c08e52aae00"},
- {file = "safetensors-0.4.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c8ed5d2c04cdc1afc6b3c28d59580448ac07732c50d94c15e14670f9c473a2ce"},
- {file = "safetensors-0.4.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:2b6a2814278b6660261aa9a9aae524616de9f1ec364e3716d219b6ed8f91801f"},
- {file = "safetensors-0.4.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3cfd1ca35eacc635f0eaa894e5c5ed83ffebd0f95cac298fd430014fa7323631"},
- {file = "safetensors-0.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4177b456c6b0c722d82429127b5beebdaf07149d265748e97e0a34ff0b3694c8"},
- {file = "safetensors-0.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:313e8472197bde54e3ec54a62df184c414582979da8f3916981b6a7954910a1b"},
- {file = "safetensors-0.4.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fdb4adb76e21bad318210310590de61c9f4adcef77ee49b4a234f9dc48867869"},
- {file = "safetensors-0.4.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1d568628e9c43ca15eb96c217da73737c9ccb07520fafd8a1eba3f2750614105"},
- {file = "safetensors-0.4.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:573b6023a55a2f28085fc0a84e196c779b6cbef4d9e73acea14c8094fee7686f"},
- {file = "safetensors-0.4.1.tar.gz", hash = "sha256:2304658e6ada81a5223225b4efe84748e760c46079bffedf7e321763cafb36c9"},
-]
-
-[package.extras]
-all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"]
-dev = ["safetensors[all]"]
-jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"]
-numpy = ["numpy (>=1.21.6)"]
-paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"]
-pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"]
-quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"]
-tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"]
-testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"]
-torch = ["safetensors[numpy]", "torch (>=1.10)"]
-
[[package]]
name = "setuptools"
version = "70.0.0"
@@ -2770,137 +1598,6 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""
[package.extras]
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
-[[package]]
-name = "sympy"
-version = "1.12"
-description = "Computer algebra system (CAS) in Python"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
- {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
-]
-
-[package.dependencies]
-mpmath = ">=0.19"
-
-[[package]]
-name = "tokenizers"
-version = "0.19.1"
-description = ""
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"},
- {file = "tokenizers-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82c8b8063de6c0468f08e82c4e198763e7b97aabfe573fd4cf7b33930ca4df77"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f03727225feaf340ceeb7e00604825addef622d551cbd46b7b775ac834c1e1c4"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:453e4422efdfc9c6b6bf2eae00d5e323f263fff62b29a8c9cd526c5003f3f642"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02e81bf089ebf0e7f4df34fa0207519f07e66d8491d963618252f2e0729e0b46"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b07c538ba956843833fee1190cf769c60dc62e1cf934ed50d77d5502194d63b1"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28cab1582e0eec38b1f38c1c1fb2e56bce5dc180acb1724574fc5f47da2a4fe"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b01afb7193d47439f091cd8f070a1ced347ad0f9144952a30a41836902fe09e"},
- {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7fb297edec6c6841ab2e4e8f357209519188e4a59b557ea4fafcf4691d1b4c98"},
- {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e8a3dd055e515df7054378dc9d6fa8c8c34e1f32777fb9a01fea81496b3f9d3"},
- {file = "tokenizers-0.19.1-cp310-none-win32.whl", hash = "sha256:7ff898780a155ea053f5d934925f3902be2ed1f4d916461e1a93019cc7250837"},
- {file = "tokenizers-0.19.1-cp310-none-win_amd64.whl", hash = "sha256:bea6f9947e9419c2fda21ae6c32871e3d398cba549b93f4a65a2d369662d9403"},
- {file = "tokenizers-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5c88d1481f1882c2e53e6bb06491e474e420d9ac7bdff172610c4f9ad3898059"},
- {file = "tokenizers-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddf672ed719b4ed82b51499100f5417d7d9f6fb05a65e232249268f35de5ed14"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dadc509cc8a9fe460bd274c0e16ac4184d0958117cf026e0ea8b32b438171594"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfedf31824ca4915b511b03441784ff640378191918264268e6923da48104acc"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac11016d0a04aa6487b1513a3a36e7bee7eec0e5d30057c9c0408067345c48d2"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76951121890fea8330d3a0df9a954b3f2a37e3ec20e5b0530e9a0044ca2e11fe"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b342d2ce8fc8d00f376af068e3274e2e8649562e3bc6ae4a67784ded6b99428d"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16ff18907f4909dca9b076b9c2d899114dd6abceeb074eca0c93e2353f943aa"},
- {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:706a37cc5332f85f26efbe2bdc9ef8a9b372b77e4645331a405073e4b3a8c1c6"},
- {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16baac68651701364b0289979ecec728546133e8e8fe38f66fe48ad07996b88b"},
- {file = "tokenizers-0.19.1-cp311-none-win32.whl", hash = "sha256:9ed240c56b4403e22b9584ee37d87b8bfa14865134e3e1c3fb4b2c42fafd3256"},
- {file = "tokenizers-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:ad57d59341710b94a7d9dbea13f5c1e7d76fd8d9bcd944a7a6ab0b0da6e0cc66"},
- {file = "tokenizers-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:621d670e1b1c281a1c9698ed89451395d318802ff88d1fc1accff0867a06f153"},
- {file = "tokenizers-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d924204a3dbe50b75630bd16f821ebda6a5f729928df30f582fb5aade90c818a"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f3fefdc0446b1a1e6d81cd4c07088ac015665d2e812f6dbba4a06267d1a2c95"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9620b78e0b2d52ef07b0d428323fb34e8ea1219c5eac98c2596311f20f1f9266"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04ce49e82d100594715ac1b2ce87d1a36e61891a91de774755f743babcd0dd52"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5c2ff13d157afe413bf7e25789879dd463e5a4abfb529a2d8f8473d8042e28f"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3174c76efd9d08f836bfccaca7cfec3f4d1c0a4cf3acbc7236ad577cc423c840"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9d5b6c0e7a1e979bec10ff960fae925e947aab95619a6fdb4c1d8ff3708ce3"},
- {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a179856d1caee06577220ebcfa332af046d576fb73454b8f4d4b0ba8324423ea"},
- {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:952b80dac1a6492170f8c2429bd11fcaa14377e097d12a1dbe0ef2fb2241e16c"},
- {file = "tokenizers-0.19.1-cp312-none-win32.whl", hash = "sha256:01d62812454c188306755c94755465505836fd616f75067abcae529c35edeb57"},
- {file = "tokenizers-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:b70bfbe3a82d3e3fb2a5e9b22a39f8d1740c96c68b6ace0086b39074f08ab89a"},
- {file = "tokenizers-0.19.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:bb9dfe7dae85bc6119d705a76dc068c062b8b575abe3595e3c6276480e67e3f1"},
- {file = "tokenizers-0.19.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:1f0360cbea28ea99944ac089c00de7b2e3e1c58f479fb8613b6d8d511ce98267"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:71e3ec71f0e78780851fef28c2a9babe20270404c921b756d7c532d280349214"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b82931fa619dbad979c0ee8e54dd5278acc418209cc897e42fac041f5366d626"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8ff5b90eabdcdaa19af697885f70fe0b714ce16709cf43d4952f1f85299e73a"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e742d76ad84acbdb1a8e4694f915fe59ff6edc381c97d6dfdd054954e3478ad4"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8c5d59d7b59885eab559d5bc082b2985555a54cda04dda4c65528d90ad252ad"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b2da5c32ed869bebd990c9420df49813709e953674c0722ff471a116d97b22d"},
- {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:638e43936cc8b2cbb9f9d8dde0fe5e7e30766a3318d2342999ae27f68fdc9bd6"},
- {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:78e769eb3b2c79687d9cb0f89ef77223e8e279b75c0a968e637ca7043a84463f"},
- {file = "tokenizers-0.19.1-cp37-none-win32.whl", hash = "sha256:72791f9bb1ca78e3ae525d4782e85272c63faaef9940d92142aa3eb79f3407a3"},
- {file = "tokenizers-0.19.1-cp37-none-win_amd64.whl", hash = "sha256:f3bbb7a0c5fcb692950b041ae11067ac54826204318922da754f908d95619fbc"},
- {file = "tokenizers-0.19.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:07f9295349bbbcedae8cefdbcfa7f686aa420be8aca5d4f7d1ae6016c128c0c5"},
- {file = "tokenizers-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10a707cc6c4b6b183ec5dbfc5c34f3064e18cf62b4a938cb41699e33a99e03c1"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6309271f57b397aa0aff0cbbe632ca9d70430839ca3178bf0f06f825924eca22"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad23d37d68cf00d54af184586d79b84075ada495e7c5c0f601f051b162112dc"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:427c4f0f3df9109314d4f75b8d1f65d9477033e67ffaec4bca53293d3aca286d"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e83a31c9cf181a0a3ef0abad2b5f6b43399faf5da7e696196ddd110d332519ee"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c27b99889bd58b7e301468c0838c5ed75e60c66df0d4db80c08f43462f82e0d3"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bac0b0eb952412b0b196ca7a40e7dce4ed6f6926489313414010f2e6b9ec2adf"},
- {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6298bde623725ca31c9035a04bf2ef63208d266acd2bed8c2cb7d2b7d53ce6"},
- {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:08a44864e42fa6d7d76d7be4bec62c9982f6f6248b4aa42f7302aa01e0abfd26"},
- {file = "tokenizers-0.19.1-cp38-none-win32.whl", hash = "sha256:1de5bc8652252d9357a666e609cb1453d4f8e160eb1fb2830ee369dd658e8975"},
- {file = "tokenizers-0.19.1-cp38-none-win_amd64.whl", hash = "sha256:0bcce02bf1ad9882345b34d5bd25ed4949a480cf0e656bbd468f4d8986f7a3f1"},
- {file = "tokenizers-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0b9394bd204842a2a1fd37fe29935353742be4a3460b6ccbaefa93f58a8df43d"},
- {file = "tokenizers-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4692ab92f91b87769d950ca14dbb61f8a9ef36a62f94bad6c82cc84a51f76f6a"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6258c2ef6f06259f70a682491c78561d492e885adeaf9f64f5389f78aa49a051"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c85cf76561fbd01e0d9ea2d1cbe711a65400092bc52b5242b16cfd22e51f0c58"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670b802d4d82bbbb832ddb0d41df7015b3e549714c0e77f9bed3e74d42400fbe"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85aa3ab4b03d5e99fdd31660872249df5e855334b6c333e0bc13032ff4469c4a"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbf001afbbed111a79ca47d75941e9e5361297a87d186cbfc11ed45e30b5daba"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c89aa46c269e4e70c4d4f9d6bc644fcc39bb409cb2a81227923404dd6f5227"},
- {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:39c1ec76ea1027438fafe16ecb0fb84795e62e9d643444c1090179e63808c69d"},
- {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c2a0d47a89b48d7daa241e004e71fb5a50533718897a4cd6235cb846d511a478"},
- {file = "tokenizers-0.19.1-cp39-none-win32.whl", hash = "sha256:61b7fe8886f2e104d4caf9218b157b106207e0f2a4905c9c7ac98890688aabeb"},
- {file = "tokenizers-0.19.1-cp39-none-win_amd64.whl", hash = "sha256:f97660f6c43efd3e0bfd3f2e3e5615bf215680bad6ee3d469df6454b8c6e8256"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b11853f17b54c2fe47742c56d8a33bf49ce31caf531e87ac0d7d13d327c9334"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d26194ef6c13302f446d39972aaa36a1dda6450bc8949f5eb4c27f51191375bd"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e8d1ed93beda54bbd6131a2cb363a576eac746d5c26ba5b7556bc6f964425594"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca407133536f19bdec44b3da117ef0d12e43f6d4b56ac4c765f37eca501c7bda"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce05fde79d2bc2e46ac08aacbc142bead21614d937aac950be88dc79f9db9022"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:35583cd46d16f07c054efd18b5d46af4a2f070a2dd0a47914e66f3ff5efb2b1e"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b4399b59d1af5645bcee2072a463318114c39b8547437a7c2d6a186a1b5a0e2d"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6852c5b2a853b8b0ddc5993cd4f33bfffdca4fcc5d52f89dd4b8eada99379285"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd266ae85c3d39df2f7e7d0e07f6c41a55e9a3123bb11f854412952deacd828"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecb2651956eea2aa0a2d099434134b1b68f1c31f9a5084d6d53f08ed43d45ff2"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b279ab506ec4445166ac476fb4d3cc383accde1ea152998509a94d82547c8e2a"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:89183e55fb86e61d848ff83753f64cded119f5d6e1f553d14ffee3700d0a4a49"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2edbc75744235eea94d595a8b70fe279dd42f3296f76d5a86dde1d46e35f574"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:0e64bfde9a723274e9a71630c3e9494ed7b4c0f76a1faacf7fe294cd26f7ae7c"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b5ca92bfa717759c052e345770792d02d1f43b06f9e790ca0a1db62838816f3"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f8a20266e695ec9d7a946a019c1d5ca4eddb6613d4f466888eee04f16eedb85"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c38f45d8f2a2ec0f3a20073cccb335b9f99f73b3c69483cd52ebc75369d8a1"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dd26e3afe8a7b61422df3176e06664503d3f5973b94f45d5c45987e1cb711876"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:eddd5783a4a6309ce23432353cdb36220e25cbb779bfa9122320666508b44b88"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:56ae39d4036b753994476a1b935584071093b55c7a72e3b8288e68c313ca26e7"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f9939ca7e58c2758c01b40324a59c034ce0cebad18e0d4563a9b1beab3018243"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c330c0eb815d212893c67a032e9dc1b38a803eccb32f3e8172c19cc69fbb439"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec11802450a2487cdf0e634b750a04cbdc1c4d066b97d94ce7dd2cb51ebb325b"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b718f316b596f36e1dae097a7d5b91fc5b85e90bf08b01ff139bd8953b25af"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ed69af290c2b65169f0ba9034d1dc39a5db9459b32f1dd8b5f3f32a3fcf06eab"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f8a9c828277133af13f3859d1b6bf1c3cb6e9e1637df0e45312e6b7c2e622b1f"},
- {file = "tokenizers-0.19.1.tar.gz", hash = "sha256:ee59e6680ed0fdbe6b724cf38bd70400a0c1dd623b07ac729087270caeac88e3"},
-]
-
-[package.dependencies]
-huggingface-hub = ">=0.16.4,<1.0"
-
-[package.extras]
-dev = ["tokenizers[testing]"]
-docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"]
-testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"]
-
[[package]]
name = "toml"
version = "0.10.2"
@@ -2934,64 +1631,6 @@ files = [
{file = "tomlkit-0.12.3.tar.gz", hash = "sha256:75baf5012d06501f07bee5bf8e801b9f343e7aac5a92581f20f80ce632e6b5a4"},
]
-[[package]]
-name = "torch"
-version = "2.2.0"
-description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
-optional = true
-python-versions = ">=3.8.0"
-files = [
- {file = "torch-2.2.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d366158d6503a3447e67f8c0ad1328d54e6c181d88572d688a625fac61b13a97"},
- {file = "torch-2.2.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:707f2f80402981e9f90d0038d7d481678586251e6642a7a6ef67fc93511cb446"},
- {file = "torch-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:15c8f0a105c66b28496092fca1520346082e734095f8eaf47b5786bac24b8a31"},
- {file = "torch-2.2.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:0ca4df4b728515ad009b79f5107b00bcb2c63dc202d991412b9eb3b6a4f24349"},
- {file = "torch-2.2.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:3d3eea2d5969b9a1c9401429ca79efc668120314d443d3463edc3289d7f003c7"},
- {file = "torch-2.2.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0d1c580e379c0d48f0f0a08ea28d8e373295aa254de4f9ad0631f9ed8bc04c24"},
- {file = "torch-2.2.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9328e3c1ce628a281d2707526b4d1080eae7c4afab4f81cea75bde1f9441dc78"},
- {file = "torch-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:03c8e660907ac1b8ee07f6d929c4e15cd95be2fb764368799cca02c725a212b8"},
- {file = "torch-2.2.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:da0cefe7f84ece3e3b56c11c773b59d1cb2c0fd83ddf6b5f7f1fd1a987b15c3e"},
- {file = "torch-2.2.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f81d23227034221a4a4ff8ef24cc6cec7901edd98d9e64e32822778ff01be85e"},
- {file = "torch-2.2.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:dcbfb2192ac41ca93c756ebe9e2af29df0a4c14ee0e7a0dd78f82c67a63d91d4"},
- {file = "torch-2.2.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:9eeb42971619e24392c9088b5b6d387d896e267889d41d267b1fec334f5227c5"},
- {file = "torch-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:c718b2ca69a6cac28baa36d86d8c0ec708b102cebd1ceb1b6488e404cd9be1d1"},
- {file = "torch-2.2.0-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:f11d18fceb4f9ecb1ac680dde7c463c120ed29056225d75469c19637e9f98d12"},
- {file = "torch-2.2.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:ee1da852bfd4a7e674135a446d6074c2da7194c1b08549e31eae0b3138c6b4d2"},
- {file = "torch-2.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0d819399819d0862268ac531cf12a501c253007df4f9e6709ede8a0148f1a7b8"},
- {file = "torch-2.2.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:08f53ccc38c49d839bc703ea1b20769cc8a429e0c4b20b56921a9f64949bf325"},
- {file = "torch-2.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:93bffe3779965a71dab25fc29787538c37c5d54298fd2f2369e372b6fb137d41"},
- {file = "torch-2.2.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:c17ec323da778efe8dad49d8fb534381479ca37af1bfc58efdbb8607a9d263a3"},
- {file = "torch-2.2.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c02685118008834e878f676f81eab3a952b7936fa31f474ef8a5ff4b5c78b36d"},
- {file = "torch-2.2.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:d9f39d6f53cec240a0e3baa82cb697593340f9d4554cee6d3d6ca07925c2fac0"},
- {file = "torch-2.2.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:51770c065206250dc1222ea7c0eff3f88ab317d3e931cca2aee461b85fbc2472"},
- {file = "torch-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:008e4c6ad703de55af760c73bf937ecdd61a109f9b08f2bbb9c17e7c7017f194"},
- {file = "torch-2.2.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:de8680472dd14e316f42ceef2a18a301461a9058cd6e99a1f1b20f78f11412f1"},
- {file = "torch-2.2.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:99e1dcecb488e3fd25bcaac56e48cdb3539842904bdc8588b0b255fde03a254c"},
-]
-
-[package.dependencies]
-filelock = "*"
-fsspec = "*"
-jinja2 = "*"
-networkx = "*"
-nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nccl-cu12 = {version = "2.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-sympy = "*"
-triton = {version = "2.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-typing-extensions = ">=4.8.0"
-
-[package.extras]
-opt-einsum = ["opt-einsum (>=3.3)"]
-optree = ["optree (>=0.9.1)"]
-
[[package]]
name = "tqdm"
version = "4.66.2"
@@ -3012,95 +1651,6 @@ notebook = ["ipywidgets (>=6)"]
slack = ["slack-sdk"]
telegram = ["requests"]
-[[package]]
-name = "transformers"
-version = "4.41.2"
-description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
-optional = true
-python-versions = ">=3.8.0"
-files = [
- {file = "transformers-4.41.2-py3-none-any.whl", hash = "sha256:05555d20e43f808de1ef211ab64803cdb513170cef70d29a888b589caebefc67"},
- {file = "transformers-4.41.2.tar.gz", hash = "sha256:80a4db216533d573e9cc7388646c31ed9480918feb7c55eb211249cb23567f87"},
-]
-
-[package.dependencies]
-filelock = "*"
-huggingface-hub = ">=0.23.0,<1.0"
-numpy = ">=1.17"
-packaging = ">=20.0"
-pyyaml = ">=5.1"
-regex = "!=2019.12.17"
-requests = "*"
-safetensors = ">=0.4.1"
-tokenizers = ">=0.19,<0.20"
-tqdm = ">=4.27"
-
-[package.extras]
-accelerate = ["accelerate (>=0.21.0)"]
-agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"]
-all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision"]
-audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
-codecarbon = ["codecarbon (==1.2.0)"]
-deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"]
-deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.19,<0.20)", "urllib3 (<2.0.0)"]
-dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
-flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
-ftfy = ["ftfy"]
-integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"]
-ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
-modelcreation = ["cookiecutter (==1.7.3)"]
-natten = ["natten (>=0.14.6,<0.15.0)"]
-onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"]
-onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
-optuna = ["optuna"]
-quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<2.0.0)"]
-ray = ["ray[tune] (>=2.7.0)"]
-retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
-sagemaker = ["sagemaker (>=2.31.0)"]
-sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
-serving = ["fastapi", "pydantic", "starlette", "uvicorn"]
-sigopt = ["sigopt"]
-sklearn = ["scikit-learn"]
-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
-testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
-tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
-tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
-timm = ["timm"]
-tokenizers = ["tokenizers (>=0.19,<0.20)"]
-torch = ["accelerate (>=0.21.0)", "torch"]
-torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
-torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
-torchhub = ["filelock", "huggingface-hub (>=0.23.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.19,<0.20)", "torch", "tqdm (>=4.27)"]
-video = ["av (==9.2.0)", "decord (==0.6.0)"]
-vision = ["Pillow (>=10.0.1,<=15.0)"]
-
-[[package]]
-name = "triton"
-version = "2.2.0"
-description = "A language and compiler for custom Deep Learning operations"
-optional = true
-python-versions = "*"
-files = [
- {file = "triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2294514340cfe4e8f4f9e5c66c702744c4a117d25e618bd08469d0bfed1e2e5"},
- {file = "triton-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da58a152bddb62cafa9a857dd2bc1f886dbf9f9c90a2b5da82157cd2b34392b0"},
- {file = "triton-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af58716e721460a61886668b205963dc4d1e4ac20508cc3f623aef0d70283d5"},
- {file = "triton-2.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8fe46d3ab94a8103e291bd44c741cc294b91d1d81c1a2888254cbf7ff846dab"},
- {file = "triton-2.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ce26093e539d727e7cf6f6f0d932b1ab0574dc02567e684377630d86723ace"},
- {file = "triton-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:227cc6f357c5efcb357f3867ac2a8e7ecea2298cd4606a8ba1e931d1d5a947df"},
-]
-
-[package.dependencies]
-filelock = "*"
-
-[package.extras]
-build = ["cmake (>=3.20)", "lit"]
-tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"]
-tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
-
[[package]]
name = "typer"
version = "0.9.0"
@@ -3216,17 +1766,6 @@ files = [
{file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"},
]
-[[package]]
-name = "tzdata"
-version = "2024.1"
-description = "Provider of IANA time zone data"
-optional = true
-python-versions = ">=2"
-files = [
- {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
- {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
-]
-
[[package]]
name = "urllib3"
version = "2.0.7"
@@ -3342,123 +1881,6 @@ files = [
{file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
]
-[[package]]
-name = "xxhash"
-version = "3.4.1"
-description = "Python binding for xxHash"
-optional = true
-python-versions = ">=3.7"
-files = [
- {file = "xxhash-3.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f"},
- {file = "xxhash-3.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2"},
- {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb11628470a6004dc71a09fe90c2f459ff03d611376c1debeec2d648f44cb693"},
- {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bef2a7dc7b4f4beb45a1edbba9b9194c60a43a89598a87f1a0226d183764189"},
- {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c0f7b2d547d72c7eda7aa817acf8791f0146b12b9eba1d4432c531fb0352228"},
- {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00f2fdef6b41c9db3d2fc0e7f94cb3db86693e5c45d6de09625caad9a469635b"},
- {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23cfd9ca09acaf07a43e5a695143d9a21bf00f5b49b15c07d5388cadf1f9ce11"},
- {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6a9ff50a3cf88355ca4731682c168049af1ca222d1d2925ef7119c1a78e95b3b"},
- {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f1d7c69a1e9ca5faa75546fdd267f214f63f52f12692f9b3a2f6467c9e67d5e7"},
- {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:672b273040d5d5a6864a36287f3514efcd1d4b1b6a7480f294c4b1d1ee1b8de0"},
- {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4178f78d70e88f1c4a89ff1ffe9f43147185930bb962ee3979dba15f2b1cc799"},
- {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9804b9eb254d4b8cc83ab5a2002128f7d631dd427aa873c8727dba7f1f0d1c2b"},
- {file = "xxhash-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c09c49473212d9c87261d22c74370457cfff5db2ddfc7fd1e35c80c31a8c14ce"},
- {file = "xxhash-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:ebbb1616435b4a194ce3466d7247df23499475c7ed4eb2681a1fa42ff766aff6"},
- {file = "xxhash-3.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:25dc66be3db54f8a2d136f695b00cfe88018e59ccff0f3b8f545869f376a8a46"},
- {file = "xxhash-3.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58c49083801885273e262c0f5bbeac23e520564b8357fbb18fb94ff09d3d3ea5"},
- {file = "xxhash-3.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b526015a973bfbe81e804a586b703f163861da36d186627e27524f5427b0d520"},
- {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36ad4457644c91a966f6fe137d7467636bdc51a6ce10a1d04f365c70d6a16d7e"},
- {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:248d3e83d119770f96003271fe41e049dd4ae52da2feb8f832b7a20e791d2920"},
- {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2070b6d5bbef5ee031666cf21d4953c16e92c2f8a24a94b5c240f8995ba3b1d0"},
- {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2746035f518f0410915e247877f7df43ef3372bf36cfa52cc4bc33e85242641"},
- {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a8ba6181514681c2591840d5632fcf7356ab287d4aff1c8dea20f3c78097088"},
- {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aac5010869240e95f740de43cd6a05eae180c59edd182ad93bf12ee289484fa"},
- {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4cb11d8debab1626181633d184b2372aaa09825bde709bf927704ed72765bed1"},
- {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b29728cff2c12f3d9f1d940528ee83918d803c0567866e062683f300d1d2eff3"},
- {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a15cbf3a9c40672523bdb6ea97ff74b443406ba0ab9bca10ceccd9546414bd84"},
- {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e66df260fed01ed8ea790c2913271641c58481e807790d9fca8bfd5a3c13844"},
- {file = "xxhash-3.4.1-cp311-cp311-win32.whl", hash = "sha256:e867f68a8f381ea12858e6d67378c05359d3a53a888913b5f7d35fbf68939d5f"},
- {file = "xxhash-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:200a5a3ad9c7c0c02ed1484a1d838b63edcf92ff538770ea07456a3732c577f4"},
- {file = "xxhash-3.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:1d03f1c0d16d24ea032e99f61c552cb2b77d502e545187338bea461fde253583"},
- {file = "xxhash-3.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c4bbba9b182697a52bc0c9f8ec0ba1acb914b4937cd4a877ad78a3b3eeabefb3"},
- {file = "xxhash-3.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9fd28a9da300e64e434cfc96567a8387d9a96e824a9be1452a1e7248b7763b78"},
- {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6066d88c9329ab230e18998daec53d819daeee99d003955c8db6fc4971b45ca3"},
- {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93805bc3233ad89abf51772f2ed3355097a5dc74e6080de19706fc447da99cd3"},
- {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64da57d5ed586ebb2ecdde1e997fa37c27fe32fe61a656b77fabbc58e6fbff6e"},
- {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a97322e9a7440bf3c9805cbaac090358b43f650516486746f7fa482672593df"},
- {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe750d512982ee7d831838a5dee9e9848f3fb440e4734cca3f298228cc957a6"},
- {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fd79d4087727daf4d5b8afe594b37d611ab95dc8e29fe1a7517320794837eb7d"},
- {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:743612da4071ff9aa4d055f3f111ae5247342931dedb955268954ef7201a71ff"},
- {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b41edaf05734092f24f48c0958b3c6cbaaa5b7e024880692078c6b1f8247e2fc"},
- {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:a90356ead70d715fe64c30cd0969072de1860e56b78adf7c69d954b43e29d9fa"},
- {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac56eebb364e44c85e1d9e9cc5f6031d78a34f0092fea7fc80478139369a8b4a"},
- {file = "xxhash-3.4.1-cp312-cp312-win32.whl", hash = "sha256:911035345932a153c427107397c1518f8ce456f93c618dd1c5b54ebb22e73747"},
- {file = "xxhash-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:f31ce76489f8601cc7b8713201ce94b4bd7b7ce90ba3353dccce7e9e1fee71fa"},
- {file = "xxhash-3.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:b5beb1c6a72fdc7584102f42c4d9df232ee018ddf806e8c90906547dfb43b2da"},
- {file = "xxhash-3.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6d42b24d1496deb05dee5a24ed510b16de1d6c866c626c2beb11aebf3be278b9"},
- {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b685fab18876b14a8f94813fa2ca80cfb5ab6a85d31d5539b7cd749ce9e3624"},
- {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419ffe34c17ae2df019a4685e8d3934d46b2e0bbe46221ab40b7e04ed9f11137"},
- {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e041ce5714f95251a88670c114b748bca3bf80cc72400e9f23e6d0d59cf2681"},
- {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc860d887c5cb2f524899fb8338e1bb3d5789f75fac179101920d9afddef284b"},
- {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:312eba88ffe0a05e332e3a6f9788b73883752be63f8588a6dc1261a3eaaaf2b2"},
- {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e01226b6b6a1ffe4e6bd6d08cfcb3ca708b16f02eb06dd44f3c6e53285f03e4f"},
- {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9f3025a0d5d8cf406a9313cd0d5789c77433ba2004b1c75439b67678e5136537"},
- {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:6d3472fd4afef2a567d5f14411d94060099901cd8ce9788b22b8c6f13c606a93"},
- {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:43984c0a92f06cac434ad181f329a1445017c33807b7ae4f033878d860a4b0f2"},
- {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a55e0506fdb09640a82ec4f44171273eeabf6f371a4ec605633adb2837b5d9d5"},
- {file = "xxhash-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:faec30437919555b039a8bdbaba49c013043e8f76c999670aef146d33e05b3a0"},
- {file = "xxhash-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:c9e1b646af61f1fc7083bb7b40536be944f1ac67ef5e360bca2d73430186971a"},
- {file = "xxhash-3.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:961d948b7b1c1b6c08484bbce3d489cdf153e4122c3dfb07c2039621243d8795"},
- {file = "xxhash-3.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:719a378930504ab159f7b8e20fa2aa1896cde050011af838af7e7e3518dd82de"},
- {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74fb5cb9406ccd7c4dd917f16630d2e5e8cbbb02fc2fca4e559b2a47a64f4940"},
- {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5dab508ac39e0ab988039bc7f962c6ad021acd81fd29145962b068df4148c476"},
- {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c59f3e46e7daf4c589e8e853d700ef6607afa037bfad32c390175da28127e8c"},
- {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cc07256eff0795e0f642df74ad096f8c5d23fe66bc138b83970b50fc7f7f6c5"},
- {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9f749999ed80f3955a4af0eb18bb43993f04939350b07b8dd2f44edc98ffee9"},
- {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7688d7c02149a90a3d46d55b341ab7ad1b4a3f767be2357e211b4e893efbaaf6"},
- {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a8b4977963926f60b0d4f830941c864bed16aa151206c01ad5c531636da5708e"},
- {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8106d88da330f6535a58a8195aa463ef5281a9aa23b04af1848ff715c4398fb4"},
- {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4c76a77dbd169450b61c06fd2d5d436189fc8ab7c1571d39265d4822da16df22"},
- {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:11f11357c86d83e53719c592021fd524efa9cf024dc7cb1dfb57bbbd0d8713f2"},
- {file = "xxhash-3.4.1-cp38-cp38-win32.whl", hash = "sha256:0c786a6cd74e8765c6809892a0d45886e7c3dc54de4985b4a5eb8b630f3b8e3b"},
- {file = "xxhash-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:aabf37fb8fa27430d50507deeab2ee7b1bcce89910dd10657c38e71fee835594"},
- {file = "xxhash-3.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6127813abc1477f3a83529b6bbcfeddc23162cece76fa69aee8f6a8a97720562"},
- {file = "xxhash-3.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef2e194262f5db16075caea7b3f7f49392242c688412f386d3c7b07c7733a70a"},
- {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71be94265b6c6590f0018bbf73759d21a41c6bda20409782d8117e76cd0dfa8b"},
- {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10e0a619cdd1c0980e25eb04e30fe96cf8f4324758fa497080af9c21a6de573f"},
- {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa122124d2e3bd36581dd78c0efa5f429f5220313479fb1072858188bc2d5ff1"},
- {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17032f5a4fea0a074717fe33477cb5ee723a5f428de7563e75af64bfc1b1e10"},
- {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca7783b20e3e4f3f52f093538895863f21d18598f9a48211ad757680c3bd006f"},
- {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d77d09a1113899fad5f354a1eb4f0a9afcf58cefff51082c8ad643ff890e30cf"},
- {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:21287bcdd299fdc3328cc0fbbdeaa46838a1c05391264e51ddb38a3f5b09611f"},
- {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:dfd7a6cc483e20b4ad90224aeb589e64ec0f31e5610ab9957ff4314270b2bf31"},
- {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:543c7fcbc02bbb4840ea9915134e14dc3dc15cbd5a30873a7a5bf66039db97ec"},
- {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fe0a98d990e433013f41827b62be9ab43e3cf18e08b1483fcc343bda0d691182"},
- {file = "xxhash-3.4.1-cp39-cp39-win32.whl", hash = "sha256:b9097af00ebf429cc7c0e7d2fdf28384e4e2e91008130ccda8d5ae653db71e54"},
- {file = "xxhash-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:d699b921af0dcde50ab18be76c0d832f803034d80470703700cb7df0fbec2832"},
- {file = "xxhash-3.4.1-cp39-cp39-win_arm64.whl", hash = "sha256:2be491723405e15cc099ade1280133ccfbf6322d2ef568494fb7d07d280e7eee"},
- {file = "xxhash-3.4.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:431625fad7ab5649368c4849d2b49a83dc711b1f20e1f7f04955aab86cd307bc"},
- {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc6dbd5fc3c9886a9e041848508b7fb65fd82f94cc793253990f81617b61fe49"},
- {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ff8dbd0ec97aec842476cb8ccc3e17dd288cd6ce3c8ef38bff83d6eb927817"},
- {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef73a53fe90558a4096e3256752268a8bdc0322f4692ed928b6cd7ce06ad4fe3"},
- {file = "xxhash-3.4.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:450401f42bbd274b519d3d8dcf3c57166913381a3d2664d6609004685039f9d3"},
- {file = "xxhash-3.4.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a162840cf4de8a7cd8720ff3b4417fbc10001eefdd2d21541a8226bb5556e3bb"},
- {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b736a2a2728ba45017cb67785e03125a79d246462dfa892d023b827007412c52"},
- {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0ae4c2e7698adef58710d6e7a32ff518b66b98854b1c68e70eee504ad061d8"},
- {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6322c4291c3ff174dcd104fae41500e75dad12be6f3085d119c2c8a80956c51"},
- {file = "xxhash-3.4.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:dd59ed668801c3fae282f8f4edadf6dc7784db6d18139b584b6d9677ddde1b6b"},
- {file = "xxhash-3.4.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92693c487e39523a80474b0394645b393f0ae781d8db3474ccdcead0559ccf45"},
- {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4603a0f642a1e8d7f3ba5c4c25509aca6a9c1cc16f85091004a7028607ead663"},
- {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa45e8cbfbadb40a920fe9ca40c34b393e0b067082d94006f7f64e70c7490a6"},
- {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:595b252943b3552de491ff51e5bb79660f84f033977f88f6ca1605846637b7c6"},
- {file = "xxhash-3.4.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:562d8b8f783c6af969806aaacf95b6c7b776929ae26c0cd941d54644ea7ef51e"},
- {file = "xxhash-3.4.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:41ddeae47cf2828335d8d991f2d2b03b0bdc89289dc64349d712ff8ce59d0647"},
- {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c44d584afdf3c4dbb3277e32321d1a7b01d6071c1992524b6543025fb8f4206f"},
- {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd7bddb3a5b86213cc3f2c61500c16945a1b80ecd572f3078ddbbe68f9dabdfb"},
- {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ecb6c987b62437c2f99c01e97caf8d25660bf541fe79a481d05732e5236719c"},
- {file = "xxhash-3.4.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:696b4e18b7023527d5c50ed0626ac0520edac45a50ec7cf3fc265cd08b1f4c03"},
- {file = "xxhash-3.4.1.tar.gz", hash = "sha256:0379d6cf1ff987cd421609a264ce025e74f346e3e145dd106c0cc2e3ec3f99a9"},
-]
-
[[package]]
name = "yarl"
version = "1.9.4"
@@ -3577,10 +1999,7 @@ files = [
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"]
-[extras]
-mllib = ["accelerate", "datasets", "einops", "h5py", "peft", "safetensors", "transformers"]
-
[metadata]
lock-version = "2.0"
python-versions = "^3.8.1"
-content-hash = "cef02f25d2bdc395f2187bf7b01eabd560ba18e597bf50005dd90b80fa25336c"
+content-hash = "59be54627e27caf3aa6e089881036b45a65705fcd5f31c9165ddc203930d526d"
diff --git a/pyproject.toml b/pyproject.toml
index dc20e960..a6246f77 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "friendli-client"
-version = "1.4.2"
+version = "1.5.0"
description = "Client of Friendli Suite."
license = "Apache-2.0"
authors = ["FriendliAI teams "]
@@ -34,16 +34,9 @@ rich = "^12.2.0"
jsonschema = "^4.17.3"
tqdm = "^4.48.0"
pydantic = {extras = ["email"], version = ">=1.9.0, <3"}
-transformers = { version = "4.41.2", optional = true }
-h5py = { version = "^3.9.0", optional = true }
-einops = { version = "^0.6.1", optional = true }
-accelerate = { version = "0.21.0", optional = true }
-datasets = { version = "2.16.0", optional = true }
injector = "^0.21.0"
protobuf = "^5.26.1"
types-protobuf = "^5.26.0.20240422"
-peft = { version = "0.6.0", optional = true }
-safetensors = { version = "0.4.1", optional = true }
httpx = "^0.24.1"
fastapi = "^0.104.0"
uvicorn = "^0.23.2"
@@ -75,9 +68,6 @@ types-toml = "^0.10.8.6"
types-tqdm = "^4.65.0.1"
typer = "^0.9.0"
-[tool.poetry.extras]
-mllib = ["transformers", "h5py", "accelerate", "einops", "datasets", "peft", "safetensors"]
-
[tool.isort]
profile = "black"
known_local_folder = ["tests"]
@@ -122,12 +112,6 @@ disable = [
]
extension-pkg-whitelist = "pydantic"
-[tool.pylint.TYPECHECK]
-generated-members = [
- "numpy.*" ,
- "torch.*"
-]
-
[tool.pylint.check]
ignored-classes = "Depends"
ignore-patterns = [
diff --git a/tests/unit_tests/modules/__init__.py b/tests/unit_tests/modules/__init__.py
deleted file mode 100644
index 1fc4d985..00000000
--- a/tests/unit_tests/modules/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
diff --git a/tests/unit_tests/modules/conftest.py b/tests/unit_tests/modules/conftest.py
deleted file mode 100644
index f79aaa6a..00000000
--- a/tests/unit_tests/modules/conftest.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-from __future__ import annotations
-
-from typing import Any, Dict
-
-import pytest
-from peft import PeftConfig
-from transformers import (
- AutoConfig,
- BlenderbotConfig,
- BloomConfig,
- CodeGenConfig,
- FalconConfig,
- GPT2Config,
- GPTJConfig,
- GPTNeoXConfig,
- LlamaConfig,
- MistralConfig,
- MixtralConfig,
- MptConfig,
- OPTConfig,
- T5Config,
-)
-from transformers.models.mpt.configuration_mpt import MptAttentionConfig
-
-from friendli.enums import ModelDataType
-from friendli.modules.converter.base import OneOfConverter
-from friendli.modules.converter.maps import get_hf_converter_factory
-from friendli.modules.converter.models.mixtral import MixtralForCausalLMConverter
-from friendli.modules.converter.utils import get_model_arch
-
-from tests.unit_tests.modules.helpers.utils import ModelConfig, get_param_specs
-
-model_name_config_map = {
- "blenderbot": BlenderbotConfig(
- architectures=["BlenderbotForConditionalGeneration"],
- activation_function="gelu",
- tie_word_embeddings=True,
- decoder_attention_heads=32,
- encoder_attention_heads=32,
- decoder_ffn_dim=10240,
- encoder_ffn_dim=10240,
- encoder_layers=1,
- decoder_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "bloom": BloomConfig(
- architectures=["BloomForCausalLM"],
- apply_residual_connection_post_layernorm=False,
- slow_but_exact=False,
- tie_word_embeddings=True,
- layer_norm_epsilon=1e-5,
- n_layer=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "codegen": CodeGenConfig(
- architectures=["CodeGenForCausalLM"],
- activation_function="gelu",
- tie_word_embeddings=False,
- layer_norm_epsilon=1e-5,
- n_layer=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "falcon_7b": FalconConfig( # falcon-7b
- architectures=["FalconForCausalLM"],
- alibi=False,
- bias=False,
- new_decoder_architecture=False,
- parallel_attn=True,
- num_hidden_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "falcon": FalconConfig( # falcon-40b
- architectures=["FalconForCausalLM"],
- alibi=False,
- bias=False,
- new_decoder_architecture=True,
- num_hidden_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "gpt_neox": GPTNeoXConfig( # pythia-1.4b
- architectures=["GPTNeoXForCausalLM"],
- hidden_act="gelu",
- use_parallel_residual=True,
- tie_word_embeddings=False,
- layer_norm_eps=1e-5,
- rotary_emb_base=10000,
- num_hidden_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "gpt": GPT2Config(
- architectures=["GPT2LMHeadModel"],
- activation_function="gelu",
- scale_attn_by_inverse_layer_idx=False,
- tie_word_embeddings=True,
- layer_norm_epsilon=1e-5,
- n_layer=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "gpt_j": GPTJConfig( # gpt-j-6b
- architectures=["GPTJForCausalLM"],
- tie_word_embeddings=False,
- layer_norm_epsilon=1e-5,
- n_layer=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "llama": LlamaConfig(
- architectures=["LlamaForCausalLM"],
- hidden_act="silu",
- tie_word_embeddings=False,
- rms_norm_eps=1e-5,
- num_hidden_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "mpt": MptConfig(
- architectures=["MPTForCausalLM"],
- attn_config=MptAttentionConfig(
- alibi=True,
- alibi_bias_max=8,
- attn_type="multihead_attention",
- prefix_lm=False,
- qk_ln=False,
- softmax_scale=None,
- ),
- expansion_ratio=4,
- no_bias=True,
- logit_scale=None,
- n_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "opt": OPTConfig(
- architectures=["OPTForCausalLM"],
- activation_function="relu",
- do_layer_norm_before=True,
- word_embed_proj_dim=768,
- hidden_size=768,
- _remove_first_dropout=False,
- tie_word_embeddings=True,
- num_hidden_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "t5_v1_1": T5Config(
- architectures=["T5ForConditionalGeneration"],
- is_gated_act=True,
- tie_word_embeddings=False,
- num_hidden_layers=1,
- num_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- relative_attention_num_buckets=32, # fixed value for t5
- ),
- "t5": T5Config(
- architectures=["T5ForConditionalGeneration"],
- is_gated_act=False,
- tie_word_embeddings=True,
- layer_norm_epsilon=1e-6,
- num_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- relative_attention_num_buckets=32, # fixed value for t5
- ),
- "mistral": MistralConfig( # same as llama architecture
- architectures=["MistralForCausalLM"],
- hidden_act="silu",
- tie_word_embeddings=False,
- rope_theta=10000.0,
- rms_norm_eps=1e-5,
- num_hidden_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- "mixtral": MixtralConfig( # same as llama architecture
- architectures=["MixtralForCausalLM"],
- hidden_act="silu",
- tie_word_embeddings=False,
- rope_theta=10000.0,
- rms_norm_eps=1e-5,
- num_hidden_layers=1,
- vocab_size=10000,
- max_position_embeddings=1024,
- ),
- # TODO: add phi_msft
- # TODO: add mpt with grouped querry attention (e.g. replit-code)
-}
-
-
-@pytest.fixture
-def converter(model_config: AutoConfig) -> OneOfConverter:
- model_arch = get_model_arch(model_config)
- _, converter_cls = get_hf_converter_factory(model_arch)
- return converter_cls(model_config, None, ModelDataType.FP16)
-
-
-# TODO: add render_model_config per model
-@pytest.fixture
-def render_model_config(converter: OneOfConverter) -> ModelConfig:
- return ModelConfig(
- dtype="float16",
- num_decoder_layers=converter.decoder_layer_num,
- hidden_size=converter.decoder_hidden_size,
- num_heads=converter.decoder_num_attention_heads,
- num_kv_heads=converter.decoder_num_kv_attention_heads,
- head_size=converter.decoder_head_size,
- num_encoder_layers=converter.decoder_layer_num, # same as decoder for test
- ff_intermediate_size=converter.decoder_ff_intermediate_size,
- num_experts=converter.num_experts
- if isinstance(converter, MixtralForCausalLMConverter)
- else None,
- )
-
-
-@pytest.fixture
-def spec_data(model_name: str, render_model_config: ModelConfig) -> Dict[str, Any]:
- param_specs = get_param_specs(model_name, "models", render_model_config)
- return param_specs
diff --git a/tests/unit_tests/modules/helpers/__init__.py b/tests/unit_tests/modules/helpers/__init__.py
deleted file mode 100644
index 1fc4d985..00000000
--- a/tests/unit_tests/modules/helpers/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
diff --git a/tests/unit_tests/modules/helpers/spec.py b/tests/unit_tests/modules/helpers/spec.py
deleted file mode 100644
index 127d8d59..00000000
--- a/tests/unit_tests/modules/helpers/spec.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-"""Model spec utils"""
-
-from __future__ import annotations
-
-from enum import Enum
-from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
-
-import numpy as np
-import yaml
-from jinja2.environment import Template as JinjaTemplate
-from pydantic import BaseModel
-
-from friendli.utils.compat import model_parse
-
-
-class InvalidSpecFormatError(Exception):
- """Invalid model spec format that can be handled by users."""
-
-
-class SpecNodeType(str, Enum):
- """Model spec node type."""
-
- DATA = "data"
- GROUP = "group"
- REPEAT_GROUP = "repeat_group"
-
-
-class ParamInfo(BaseModel):
- """Parameter info."""
-
- name: str
- dtype: np.dtype
- shape: Tuple[int, ...]
-
- class Config:
- arbitrary_types_allowed = (
- True # for np.dtype only check `isinstance(dtype, np.dtype)`
- )
-
- @classmethod
- def load(cls, name: str, data: Dict[str, Any]) -> ParamInfo:
- """Load a param info from data.
-
- Args:
- name (str): Name of parameter.
- data (dict[str, Any]): A dictionary describing the parameter info.
-
- Raises:
- InvalidSpecFormatError: Raised if required key does not exist in data.
-
- Returns:
- ParamInfo: Loaded param info.
-
- """
- try:
- dtype = np.dtype(data["dtype"])
- return ParamInfo(
- name=name,
- dtype=dtype,
- shape=tuple(map(int, data["shape"])),
- )
- except (KeyError, AttributeError, TypeError) as exc:
- raise InvalidSpecFormatError from exc
-
-
-class RepeatRange(BaseModel):
- """Repeat group's repeat range."""
-
- lo: int
- hi: int
-
-
-class Template:
- """Renderable YAML template."""
-
- def __init__(self, jinja_template: JinjaTemplate):
- self._jinja2_template = jinja_template
-
- @classmethod
- def from_file(cls, path: Union[str, Path]) -> Template:
- with open(path, "r") as f:
- return cls(jinja_template=JinjaTemplate(f.read()))
-
- def render(self, **kwargs) -> Dict[str, Any] | List[Dict[str, Any]]:
- """Render a Jinja2-YAML template with filling the variables.
-
- Returns:
- dict[str, Any] | list[dict[str, Any]]: Rendered template in JSON format.
-
- """
- return yaml.safe_load(self._jinja2_template.render(**kwargs))
-
-
-class ModelSpecParser:
- """Model spec parser"""
-
- def __init__(self, model_spec: Dict[str, Any]) -> None:
- """Intialize model spec parser.
-
- Args:
- model_spec (dict[str, Any]): A dictionary describing the entire model spec.
-
- """
- self._model_spec = model_spec
-
- def get_all_param_info(self) -> Dict[ParamInfo]:
- """Get all parameter info specified in the model spec.
-
- Returns:
- list[ParamInfo]: A list of param info.
-
- """
- return self._get_param_info(self._model_spec)
-
- def _get_param_info(
- self, spec: Dict[str, Any], name_prefix: str = ""
- ) -> Dict[ParamInfo]:
- """Get a dictionary of param info in recursion.
-
- Args:
- spec (dict[str, Any]): Full or partial model spec.
- name_prefix (str, optional): Parsed name until the current recursion step. Defaults to "".
-
- Returns:
- Dict[ParamInfo]: A dictionary of param info.
-
- """
- try:
- node_type = spec["type"]
- except KeyError as exc:
- raise InvalidSpecFormatError from exc
-
- if node_type == SpecNodeType.DATA:
- return {name_prefix: ParamInfo.load(name=name_prefix, data=spec)}
- if node_type == SpecNodeType.GROUP:
- res = {}
- for child_name, child_spec in spec.items():
- if child_name == "type":
- continue
- res.update(
- self._get_param_info(
- spec=child_spec,
- name_prefix=f"{name_prefix}/{child_name}"
- if name_prefix
- else child_name,
- )
- )
- return res
- if node_type == SpecNodeType.REPEAT_GROUP:
- try:
- repeat_range = model_parse(RepeatRange, spec["range"]) # type: ignore
- except KeyError as exc:
- raise InvalidSpecFormatError from exc
- res = {}
-
- for i in range(repeat_range.lo, repeat_range.hi + 1):
- for child_name, child_spec in spec.items():
- if child_name in ["type", "range"]:
- continue
- res.update(
- self._get_param_info(
- spec=child_spec,
- name_prefix=f"{name_prefix.replace('*', str(i))}/{child_name}"
- if name_prefix
- else child_name,
- )
- )
- return res
- raise InvalidSpecFormatError
diff --git a/tests/unit_tests/modules/helpers/utils.py b/tests/unit_tests/modules/helpers/utils.py
deleted file mode 100644
index 1de9b23d..00000000
--- a/tests/unit_tests/modules/helpers/utils.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-from __future__ import annotations
-
-import os
-from dataclasses import fields
-from typing import Dict, Optional
-from unittest.mock import Mock
-
-import numpy as np
-import torch
-from accelerate import init_empty_weights
-from peft import PeftConfig, PeftModel
-from pydantic import BaseModel
-from transformers import PretrainedConfig
-
-from friendli.enums import ModelDataType
-from friendli.modules.converter.maps import (
- get_adapter_converter_factory,
- get_hf_converter_factory,
-)
-from friendli.modules.converter.utils import get_model_arch
-from friendli.modules.quantizer.awq.base import AWQQuantizer
-from friendli.modules.quantizer.layers import (
- WeightActQuantizedLinearLayer,
- WeightOnlyQuantizedLinearLayer,
-)
-from friendli.modules.quantizer.schema.config import AWQConfig
-from friendli.modules.quantizer.schema.data import QuantInput
-from friendli.modules.quantizer.smoothquant.base import SmoothQuantQuantizer
-from friendli.utils.compat import model_dump
-
-from tests.unit_tests.modules.helpers.spec import ModelSpecParser, ParamInfo, Template
-
-SPEC_PATH_PREFIX = os.path.join(
- os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "specs/"
-)
-
-
-class ModelConfig(BaseModel):
- """Adjustable model config."""
-
- dtype: str
- num_decoder_layers: int
- hidden_size: int
- num_encoder_layers: Optional[int] = None
- ff_intermediate_size: Optional[int] = None
- num_heads: Optional[int] = None
- num_kv_heads: Optional[int] = None
- head_size: Optional[int] = None
- seq_len: Optional[int] = 1024
- vocab_size: Optional[int] = 10000
- num_experts: Optional[int] = 8
-
-
-class LoraAdapterConfig(ModelConfig):
- """Adjustable model config."""
-
- lora_rank_dim: int
-
-
-class AWQModelConfig(ModelConfig):
- """Adjustable model config for AWQ."""
-
- group_size: int = 1
- q_dtype: str = "int8"
-
-
-class SmoothQuantModelConfig(ModelConfig):
- """Adjustable model config for SmoothQuant."""
-
- attn_fc_smoothing: bool = False
- ff2_smoothing: bool = False
- q_dtype: str = "int8"
-
-
-def get_numpy_data_type(data_type: ModelDataType) -> np.dtype:
- if data_type == ModelDataType.FP32:
- return np.float32
- elif data_type == ModelDataType.FP16:
- return np.float16
- elif data_type == ModelDataType.BF16:
- return np.uint32
- else:
- return np.int8
-
-
-def get_param_specs(
- model_name: str, spec_folder: str, model_config: ModelConfig
-) -> Dict[str, ParamInfo]:
- file_path = f"{SPEC_PATH_PREFIX}{spec_folder}/{model_name}.yaml"
- template = Template.from_file(file_path)
- render_config = model_dump(model_config)
- rendered = template.render(**render_config)
- assert isinstance(rendered, dict)
- parser = ModelSpecParser(model_spec=rendered)
- param_specs = parser.get_all_param_info()
- return param_specs
-
-
-def get_meta_model(
- model_config: PretrainedConfig,
-) -> torch.nn.Module:
- model_arch = get_model_arch(model_config)
- model_factory, _ = get_hf_converter_factory(model_arch)
- with init_empty_weights():
- model = model_factory(config=model_config)
- return model
-
-
-def get_meta_model_with_adapter(
- model_config: PretrainedConfig, adapter_config: PeftConfig
-) -> torch.nn.Module:
- model_arch = get_model_arch(model_config)
- model_factory, _ = get_hf_converter_factory(model_arch)
- with init_empty_weights():
- model = model_factory(config=model_config)
- PeftModel(model, adapter_config)
- return model
-
-
-def get_smoothquant_quantized_meta_model(
- model_config: PretrainedConfig, quantizer: SmoothQuantQuantizer
-):
- model = get_meta_model(model_config)
- model = quantizer.hook.pre_smooth(model).to("meta")
-
- def weight_act_quant_layer(quant_input: QuantInput):
- weight, start, end = (
- quant_input.weight,
- quant_input.start_offset,
- quant_input.end_offset,
- )
- weight = weight[start:end]
- return WeightActQuantizedLinearLayer( # meta quantized linear layer
- in_features=weight.size(1),
- out_features=weight.size(0),
- q_weight=weight,
- weight_scale=torch.zeros(weight.size(1), device="meta"),
- act_scale=torch.zeros(weight.size(1), device="meta"),
- )
-
- for tf_quant_input in quantizer.hook.iter_tf_quant_inputs(model):
- for field in fields(tf_quant_input):
- quant_input = getattr(tf_quant_input, field.name)
- if isinstance(quant_input, QuantInput):
- weight_act_quant_layer = Mock(side_effect=weight_act_quant_layer)
- q_layer = weight_act_quant_layer(quant_input)
- tf_quant_input.block.add_module(field.name, q_layer)
-
- return model
-
-
-def get_awq_quantized_meta_model(
- model_config: PretrainedConfig, quantizer: AWQQuantizer, quant_config: AWQConfig
-):
- model = get_meta_model(model_config)
- model = quantizer.hook.add_pre_scaler(model).to("meta")
-
- def weight_act_quant_layer(quant_input: QuantInput):
- weight, start, end = (
- quant_input.weight,
- quant_input.start_offset,
- quant_input.end_offset,
- )
- w = weight[start:end]
- out_dim = w.size(0)
- in_dim = w.size(1)
- num_groups = in_dim // quant_config.awq_args.quant_group_size
- return WeightOnlyQuantizedLinearLayer( # meta quantized linear layer
- in_features=in_dim,
- out_features=out_dim,
- q_weight=w,
- weight_scale=torch.zeros((num_groups, out_dim), device="meta"),
- zeros=torch.zeros((num_groups, out_dim), device="meta"),
- )
-
- for tf_quant_input in quantizer.hook.iter_tf_quant_inputs(model):
- for field in fields(tf_quant_input):
- quant_input = getattr(tf_quant_input, field.name)
- if isinstance(quant_input, QuantInput):
- weight_only_quantzer = Mock(side_effect=weight_act_quant_layer)
- q_layer = weight_only_quantzer(quant_input)
- tf_quant_input.block.add_module(field.name, q_layer)
-
- return model
diff --git a/tests/unit_tests/modules/specs/awq/gpt_j.yaml b/tests/unit_tests/modules/specs/awq/gpt_j.yaml
deleted file mode 100644
index 21ee18a5..00000000
--- a/tests/unit_tests/modules/specs/awq/gpt_j.yaml
+++ /dev/null
@@ -1,162 +0,0 @@
-# Jinja2 template to validate GPT-J model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 3 | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 4 | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- awq:
- pre_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 // group_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/awq/gpt_neox.yaml b/tests/unit_tests/modules/specs/awq/gpt_neox.yaml
deleted file mode 100644
index ca93fd0f..00000000
--- a/tests/unit_tests/modules/specs/awq/gpt_neox.yaml
+++ /dev/null
@@ -1,175 +0,0 @@
-# Jinja2 template to validate GPT-NeoX model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 3 | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- awq:
- type: group
- pre_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 4 | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- awq:
- type: group
- pre_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 // group_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/awq/llama.yaml b/tests/unit_tests/modules/specs/awq/llama.yaml
deleted file mode 100644
index 71984acc..00000000
--- a/tests/unit_tests/modules/specs/awq/llama.yaml
+++ /dev/null
@@ -1,157 +0,0 @@
-# Jinja2 template to validate LLaMA model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- c_proj:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- c_gate:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size // group_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/awq/mistral.yaml b/tests/unit_tests/modules/specs/awq/mistral.yaml
deleted file mode 100644
index 71984acc..00000000
--- a/tests/unit_tests/modules/specs/awq/mistral.yaml
+++ /dev/null
@@ -1,157 +0,0 @@
-# Jinja2 template to validate LLaMA model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- c_proj:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- c_gate:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size // group_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/awq/mpt.yaml b/tests/unit_tests/modules/specs/awq/mpt.yaml
deleted file mode 100644
index 9a1c736f..00000000
--- a/tests/unit_tests/modules/specs/awq/mpt.yaml
+++ /dev/null
@@ -1,137 +0,0 @@
-# Jinja2 template to validate MPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 3 | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- awq:
- type: group
- pre_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- awq:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 4 | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size // group_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- awq:
- type: group
- pre_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- scale:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 // group_size | int }}
- - {{ hidden_size | int }}
- zero:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 // group_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/lora/llama.yaml b/tests/unit_tests/modules/specs/lora/llama.yaml
deleted file mode 100644
index 74d9de3c..00000000
--- a/tests/unit_tests/modules/specs/lora/llama.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-# Jinja2 template to validate Llama model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- lora:
- type: group
- query_A:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ lora_rank_dim | int }}
- query_B:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ lora_rank_dim | int }}
- - {{ num_heads * head_size | int }}
- key_A:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ lora_rank_dim | int }}
- key_B:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ lora_rank_dim | int }}
- - {{ num_kv_heads * head_size | int }}
- c_proj:
- type: group
- lora:
- type: group
- lora_A:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ num_heads * head_size | int }}
- - {{ lora_rank_dim | int }}
- lora_B:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ lora_rank_dim | int }}
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- lora:
- type: group
- lora_A:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ lora_rank_dim | int }}
- lora_B:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ lora_rank_dim | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- lora:
- type: group
- lora_A:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ lora_rank_dim | int }}
- lora_B:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ lora_rank_dim | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/lora/mpt.yaml b/tests/unit_tests/modules/specs/lora/mpt.yaml
deleted file mode 100644
index 4bd1083d..00000000
--- a/tests/unit_tests/modules/specs/lora/mpt.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# Jinja2 template to validate MPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- lora:
- type: group
- lora_A:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ lora_rank_dim | int}}
- lora_B:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ lora_rank_dim | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
-
diff --git a/tests/unit_tests/modules/specs/models/ phi_msft.yaml b/tests/unit_tests/modules/specs/models/ phi_msft.yaml
deleted file mode 100644
index 15d7f42b..00000000
--- a/tests/unit_tests/modules/specs/models/ phi_msft.yaml
+++ /dev/null
@@ -1,111 +0,0 @@
-# Jinja2 template to validate phi-msft model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ((num_kv_heads * 2 + num_heads) * head_size | int )}}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int )}}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/blenderbot.yaml b/tests/unit_tests/modules/specs/models/blenderbot.yaml
deleted file mode 100644
index 02dc7d93..00000000
--- a/tests/unit_tests/modules/specs/models/blenderbot.yaml
+++ /dev/null
@@ -1,243 +0,0 @@
-# Jinja2 template to validate Blenderbot model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- cross_attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 3
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ seq_len | int }}
- - {{ hidden_size | int }}
-encoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_encoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ seq_len | int }}
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/bloom.yaml b/tests/unit_tests/modules/specs/models/bloom.yaml
deleted file mode 100644
index cb5539f9..00000000
--- a/tests/unit_tests/modules/specs/models/bloom.yaml
+++ /dev/null
@@ -1,113 +0,0 @@
-# Jinja2 template to validate Bloom model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-wte:
- type: group
- ln:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/codegen.yaml b/tests/unit_tests/modules/specs/models/codegen.yaml
deleted file mode 100644
index 3e906ec4..00000000
--- a/tests/unit_tests/modules/specs/models/codegen.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-# Jinja2 template to validate Codegen model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/falcon.yaml b/tests/unit_tests/modules/specs/models/falcon.yaml
deleted file mode 100644
index cb723f76..00000000
--- a/tests/unit_tests/modules/specs/models/falcon.yaml
+++ /dev/null
@@ -1,89 +0,0 @@
-# Jinja2 template to validate Falcon model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/falcon_7b.yaml b/tests/unit_tests/modules/specs/models/falcon_7b.yaml
deleted file mode 100644
index ca85b0a4..00000000
--- a/tests/unit_tests/modules/specs/models/falcon_7b.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-# Jinja2 template to validate Falcon 7B model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/gpt.yaml b/tests/unit_tests/modules/specs/models/gpt.yaml
deleted file mode 100644
index a8e6ff2e..00000000
--- a/tests/unit_tests/modules/specs/models/gpt.yaml
+++ /dev/null
@@ -1,109 +0,0 @@
-# Jinja2 template to validate GPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ seq_len | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/gpt_j.yaml b/tests/unit_tests/modules/specs/models/gpt_j.yaml
deleted file mode 100644
index 3417f790..00000000
--- a/tests/unit_tests/modules/specs/models/gpt_j.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-# Jinja2 template to validate GPT-J model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/gpt_neox.yaml b/tests/unit_tests/modules/specs/models/gpt_neox.yaml
deleted file mode 100644
index 93341f45..00000000
--- a/tests/unit_tests/modules/specs/models/gpt_neox.yaml
+++ /dev/null
@@ -1,109 +0,0 @@
-# Jinja2 template to validate GPT-NeoX model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/llama.yaml b/tests/unit_tests/modules/specs/models/llama.yaml
deleted file mode 100644
index d0f2266e..00000000
--- a/tests/unit_tests/modules/specs/models/llama.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# Jinja2 template to validate LLaMA model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_gate:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/mistral.yaml b/tests/unit_tests/modules/specs/models/mistral.yaml
deleted file mode 100644
index d0f2266e..00000000
--- a/tests/unit_tests/modules/specs/models/mistral.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# Jinja2 template to validate LLaMA model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_gate:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/mixtral.yaml b/tests/unit_tests/modules/specs/models/mixtral.yaml
deleted file mode 100644
index d0d79b01..00000000
--- a/tests/unit_tests/modules/specs/models/mixtral.yaml
+++ /dev/null
@@ -1,102 +0,0 @@
-# Jinja2 template to validate LLaMA model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- moe:
- type: group
- router:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int}}
- - {{ num_experts | int }}
- '*':
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_experts - 1 | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_gate:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/mpt.yaml b/tests/unit_tests/modules/specs/models/mpt.yaml
deleted file mode 100644
index 701c56d2..00000000
--- a/tests/unit_tests/modules/specs/models/mpt.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-# Jinja2 template to validate MPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/opt.yaml b/tests/unit_tests/modules/specs/models/opt.yaml
deleted file mode 100644
index 2bc76839..00000000
--- a/tests/unit_tests/modules/specs/models/opt.yaml
+++ /dev/null
@@ -1,117 +0,0 @@
-# Jinja2 template to validate OPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ seq_len | int }}
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/t5.yaml b/tests/unit_tests/modules/specs/models/t5.yaml
deleted file mode 100644
index 3f7b88fb..00000000
--- a/tests/unit_tests/modules/specs/models/t5.yaml
+++ /dev/null
@@ -1,165 +0,0 @@
-# Jinja2 template to validate T5 (t5-v1_1) model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- cross_attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 3
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: float32
- shape:
- - {{ 32 | int }}
- - {{ num_heads | int }}
-encoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_encoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: float32
- shape:
- - {{ 32 | int }}
- - {{ num_heads | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/models/t5_v1_1.yaml b/tests/unit_tests/modules/specs/models/t5_v1_1.yaml
deleted file mode 100644
index 3b99f73c..00000000
--- a/tests/unit_tests/modules/specs/models/t5_v1_1.yaml
+++ /dev/null
@@ -1,189 +0,0 @@
-# Jinja2 template to validate T5 (t5-v1_1) model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- cross_attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 3
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_gate:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: float32
- shape:
- - {{ 32 | int }}
- - {{ num_heads | int }}
-encoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_encoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 3 | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_gate:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- c_proj:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: float32
- shape:
- - {{ 32 | int }}
- - {{ num_heads | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/bloom.yaml b/tests/unit_tests/modules/specs/smoothquant/bloom.yaml
deleted file mode 100644
index c8a90282..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/bloom.yaml
+++ /dev/null
@@ -1,215 +0,0 @@
-# Jinja2 template to validate Bloom model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- smoothquant:
- type: group
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-wte:
- type: group
- ln:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/codegen.yaml b/tests/unit_tests/modules/specs/smoothquant/codegen.yaml
deleted file mode 100644
index 87013ae5..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/codegen.yaml
+++ /dev/null
@@ -1,215 +0,0 @@
-# Jinja2 template to validate Codegen model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- ln_2:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/falcon.yaml b/tests/unit_tests/modules/specs/smoothquant/falcon.yaml
deleted file mode 100644
index 91b828ac..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/falcon.yaml
+++ /dev/null
@@ -1,191 +0,0 @@
-# Jinja2 template to validate Falcon model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/falcon_7b.yaml b/tests/unit_tests/modules/specs/smoothquant/falcon_7b.yaml
deleted file mode 100644
index 0570e118..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/falcon_7b.yaml
+++ /dev/null
@@ -1,188 +0,0 @@
-# Jinja2 template to validate Falcon 7B model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ num_heads * head_size | int }}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ num_kv_heads * head_size | int }}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ num_kv_heads * head_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/gpt.yaml b/tests/unit_tests/modules/specs/smoothquant/gpt.yaml
deleted file mode 100644
index a57b3952..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/gpt.yaml
+++ /dev/null
@@ -1,211 +0,0 @@
-# Jinja2 template to validate GPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ seq_len | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/gpt_j.yaml b/tests/unit_tests/modules/specs/smoothquant/gpt_j.yaml
deleted file mode 100644
index dad7a61e..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/gpt_j.yaml
+++ /dev/null
@@ -1,215 +0,0 @@
-# Jinja2 template to validate GPT-J model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_1:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- ln_2:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/gpt_neox.yaml b/tests/unit_tests/modules/specs/smoothquant/gpt_neox.yaml
deleted file mode 100644
index 08230d20..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/gpt_neox.yaml
+++ /dev/null
@@ -1,211 +0,0 @@
-# Jinja2 template to validate GPT-NeoX model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/llama.yaml b/tests/unit_tests/modules/specs/smoothquant/llama.yaml
deleted file mode 100644
index f29d2f4e..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/llama.yaml
+++ /dev/null
@@ -1,206 +0,0 @@
-# Jinja2 template to validate LLaMA model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ (num_kv_heads * 2 + num_heads) * head_size | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ num_heads * head_size | int }}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ num_kv_heads * head_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ num_kv_heads * head_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ ff_intermediate_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_gate:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ ff_intermediate_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ ff_intermediate_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ ff_intermediate_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ ff_intermediate_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ ff_intermediate_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ ff_intermediate_size | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/mpt.yaml b/tests/unit_tests/modules/specs/smoothquant/mpt.yaml
deleted file mode 100644
index 1d520f2c..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/mpt.yaml
+++ /dev/null
@@ -1,173 +0,0 @@
-# Jinja2 template to validate MPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- ln_f:
- type: group
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/specs/smoothquant/opt.yaml b/tests/unit_tests/modules/specs/smoothquant/opt.yaml
deleted file mode 100644
index cb76b1f8..00000000
--- a/tests/unit_tests/modules/specs/smoothquant/opt.yaml
+++ /dev/null
@@ -1,219 +0,0 @@
-# Jinja2 template to validate OPT model in Friendli format.
-
-type: group
-decoder:
- type: group
- h_._*:
- type: repeat_group
- range:
- lo: 0
- hi: {{ num_decoder_layers - 1 | int }}
- attn:
- type: group
- c_attn:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 3 | int }}
- - {{ hidden_size | int }}
- q_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- q_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- k_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- v_out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int}}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if attn_fc_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- ln_*:
- type: repeat_group
- range:
- lo: 1
- hi: 2
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- mlp:
- type: group
- c_fc:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- smoothquant:
- type: group
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size * 4 | int }}
- - {{ hidden_size | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- c_proj:
- type: group
- bias:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- smoothquant:
- type: group
- {% if ff2_smoothing %}
- smoothing_vector:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- {% endif %}
- weight:0:
- type: data
- dtype: {{ q_dtype }}
- shape:
- - {{ hidden_size | int }}
- - {{ hidden_size * 4 | int }}
- weight_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- out_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size | int }}
- in_scale:0:
- type: data
- dtype: float32
- shape:
- - {{ hidden_size * 4 | int }}
- ln_f:
- type: group
- beta:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- gamma:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ hidden_size | int }}
- wpe:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ seq_len | int }}
- - {{ hidden_size | int }}
-head_fc:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
-wte:
- type: group
- weight:0:
- type: data
- dtype: {{ dtype }}
- shape:
- - {{ vocab_size | int }}
- - {{ hidden_size | int }}
diff --git a/tests/unit_tests/modules/test_awq.py b/tests/unit_tests/modules/test_awq.py
deleted file mode 100644
index 6123b159..00000000
--- a/tests/unit_tests/modules/test_awq.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-from __future__ import annotations
-
-from typing import Any, Dict
-
-import pytest
-
-from friendli.modules.converter.base import OneOfConverter
-from friendli.modules.converter.utils import get_tensor_from_state_dict
-from friendli.modules.quantizer.maps import get_quantized_converter
-from friendli.modules.quantizer.schema.config import AWQConfig
-
-from tests.unit_tests.modules.conftest import model_name_config_map
-from tests.unit_tests.modules.helpers.utils import (
- AWQModelConfig,
- get_awq_quantized_meta_model,
- get_numpy_data_type,
- get_param_specs,
-)
-
-awq_models = ["gpt_j", "gpt_neox", "llama", "mpt", "mistral"]
-awq_model_name_config_map = {}
-for model_name, model_config in model_name_config_map.items():
- if model_name in awq_models:
- awq_model_name_config_map[model_name] = model_config
-
-
-@pytest.fixture
-def quant_config() -> AWQConfig:
- return AWQConfig()
-
-
-@pytest.fixture
-def render_awq_model_config(
- converter: OneOfConverter, quant_config: AWQConfig
-) -> AWQModelConfig:
- return AWQModelConfig(
- dtype="float16",
- num_decoder_layers=converter.decoder_layer_num,
- hidden_size=converter.decoder_hidden_size,
- num_heads=converter.decoder_num_attention_heads,
- num_kv_heads=converter.decoder_num_kv_attention_heads,
- head_size=converter.decoder_head_size,
- num_encoder_layers=converter.decoder_layer_num, # same as decoder for test
- ff_intermediate_size=converter.decoder_ff_intermediate_size,
- group_size=quant_config.awq_args.quant_group_size,
- q_dtype="int8",
- )
-
-
-@pytest.fixture
-def awq_spec_data(
- model_name: str, render_awq_model_config: AWQModelConfig
-) -> Dict[str, Any]:
- param_specs = get_param_specs(model_name, "awq", render_awq_model_config)
- return param_specs
-
-
-@pytest.mark.parametrize(
- "model_config",
- awq_model_name_config_map.values(),
-)
-def test_convert_info_list_match_hf_state_dict(
- converter: OneOfConverter, quant_config: AWQConfig
-):
- quantizer = get_quantized_converter(quant_config, converter)
- convert_info_list = quantizer.get_convert_info_list()
- assert len(convert_info_list) != 0
- quantized_model = get_awq_quantized_meta_model(
- converter.config, quantizer, quant_config
- )
- state_dict = quantized_model.state_dict()
- for convert_info in convert_info_list:
- param_names = convert_info.param_names
- for param_name in param_names:
- assert param_name in state_dict
-
-
-@pytest.mark.parametrize(
- "model_name, model_config",
- awq_model_name_config_map.items(),
-)
-def test_quantized_model_match_spec(
- converter: OneOfConverter, awq_spec_data: Dict[str, Any], quant_config: AWQConfig
-):
- quantizer = get_quantized_converter(quant_config, converter)
- quantized_model = get_awq_quantized_meta_model(
- converter.config, quantizer, quant_config
- )
- state_dict = quantized_model.state_dict()
- convert_info_list = quantizer.get_convert_info_list()
- for convert_info in convert_info_list:
- converted_name, reshape_fn, param_names, data_type = (
- convert_info.converted_name,
- convert_info.reshape_fn,
- convert_info.param_names,
- convert_info.data_type,
- )
- assert awq_spec_data[converted_name].dtype == get_numpy_data_type(
- data_type
- ), f"data type mismatch for {converted_name}: {param_names}"
- params = [
- get_tensor_from_state_dict(state_dict, param_name)
- for param_name in param_names
- ]
- reshaped_tensor = reshape_fn(params)
- assert (
- awq_spec_data[converted_name].shape == reshaped_tensor.shape
- ), f"shape mismatch for {converted_name}: {param_names}"
diff --git a/tests/unit_tests/modules/test_converter.py b/tests/unit_tests/modules/test_converter.py
deleted file mode 100644
index 3214c533..00000000
--- a/tests/unit_tests/modules/test_converter.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-from __future__ import annotations
-
-from typing import Any, Dict
-
-import pytest
-
-from friendli.modules.converter.base import OneOfConverter
-from friendli.modules.converter.utils import get_tensor_from_state_dict
-
-from tests.unit_tests.modules.conftest import model_name_config_map
-from tests.unit_tests.modules.helpers.utils import get_meta_model, get_numpy_data_type
-
-
-@pytest.mark.parametrize(
- "model_config",
- model_name_config_map.values(),
-)
-def test_convert_info_list_match_hf_state_dict(converter: OneOfConverter):
- convert_info_list = converter.get_convert_info_list()
- assert len(convert_info_list) != 0
- model = get_meta_model(converter.config)
- state_dict = model.state_dict()
- for convert_info in convert_info_list:
- param_names = convert_info.param_names
- for param_name in param_names:
- assert param_name in state_dict
-
-
-@pytest.mark.parametrize(
- "model_name, model_config",
- model_name_config_map.items(),
-)
-def test_convert_info_list_match_spec(
- converter: OneOfConverter, spec_data: Dict[str, Any]
-):
- convert_info_list = converter.get_convert_info_list()
- assert len(convert_info_list) != 0
- converted_param_names = set()
- for convert_info in convert_info_list:
- converted_param_names.add(convert_info.converted_name)
-
- spec_converted_param_names = set(spec_data.keys())
- assert converted_param_names == spec_converted_param_names
-
-
-@pytest.mark.parametrize(
- "model_name, model_config",
- model_name_config_map.items(),
-)
-def test_reshape_fn_match_spec(converter: OneOfConverter, spec_data: Dict[str, Any]):
- convert_info_list = converter.get_convert_info_list()
- model = get_meta_model(converter.config)
- state_dict = model.state_dict()
- for convert_info in convert_info_list:
- converted_name, reshape_fn, param_names, data_type = (
- convert_info.converted_name,
- convert_info.reshape_fn,
- convert_info.param_names,
- convert_info.data_type,
- )
- assert spec_data[converted_name].dtype == get_numpy_data_type(
- data_type
- ), f"data type mismatch for {converted_name}: {param_names}"
- params = [
- get_tensor_from_state_dict(state_dict, param_name)
- for param_name in param_names
- ]
- reshaped_tensor = reshape_fn(params)
- assert (
- spec_data[converted_name].shape == reshaped_tensor.shape
- ), f"shape mismatch for {converted_name}: {param_names}"
diff --git a/tests/unit_tests/modules/test_lora_adapter_converter.py b/tests/unit_tests/modules/test_lora_adapter_converter.py
deleted file mode 100644
index e1626d09..00000000
--- a/tests/unit_tests/modules/test_lora_adapter_converter.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-from __future__ import annotations
-
-from typing import Any, Dict, cast
-
-import pytest
-from peft import LoraConfig
-
-from friendli.modules.converter.base import DecoderOnlyConverter, OneOfConverter
-from friendli.modules.converter.maps import get_adapter_converter_factory
-from friendli.modules.converter.utils import get_model_arch, get_tensor_from_state_dict
-
-from tests.unit_tests.modules.conftest import model_name_config_map
-from tests.unit_tests.modules.helpers.utils import (
- LoraAdapterConfig,
- get_meta_model_with_adapter,
- get_numpy_data_type,
- get_param_specs,
-)
-
-model_with_adpater = ["mpt", "llama"]
-model_with_adpater_name_config_map = {}
-for model_name, model_config in model_name_config_map.items():
- if model_name in model_with_adpater:
- model_with_adpater_name_config_map[model_name] = model_config
-
-
-@pytest.fixture
-def adapter_config(converter: OneOfConverter) -> LoraConfig:
- model_type = cast(DecoderOnlyConverter, converter).config.model_type
- if model_type == "mpt":
- return LoraConfig(target_modules=["Wqkv"])
- elif model_type == "llama":
- return LoraConfig(
- target_modules=["q_proj", "k_proj", "o_proj", "up_proj", "down_proj"]
- )
- return LoraConfig()
-
-
-@pytest.fixture
-def render_lora_adapter_config(
- converter: OneOfConverter, adapter_config: LoraConfig
-) -> LoraAdapterConfig:
- return LoraAdapterConfig(
- dtype="float16",
- num_decoder_layers=converter.decoder_layer_num,
- hidden_size=converter.decoder_hidden_size,
- num_heads=converter.decoder_num_attention_heads,
- num_kv_heads=converter.decoder_num_kv_attention_heads,
- head_size=converter.decoder_head_size,
- num_encoder_layers=converter.decoder_layer_num, # same as decoder for test
- ff_intermediate_size=converter.decoder_ff_intermediate_size,
- lora_rank_dim=adapter_config.r,
- )
-
-
-@pytest.fixture
-def lora_spec_data(
- model_name: str, render_lora_adapter_config: LoraAdapterConfig
-) -> Dict[str, Any]:
- param_specs = get_param_specs(model_name, "lora", render_lora_adapter_config)
- return param_specs
-
-
-@pytest.mark.parametrize(
- "model_config",
- model_with_adpater_name_config_map.values(),
-)
-def test_convert_info_list_match_hf_state_dict(
- converter: OneOfConverter,
- adapter_config: LoraConfig,
-):
- model_arch = get_model_arch(converter.config)
- adapter_converter_cls = get_adapter_converter_factory(model_arch)
- adapter_converter = adapter_converter_cls(converter, adapter_config)
-
- convert_info_list = adapter_converter.get_convert_info_list()
- model_with_adapter = get_meta_model_with_adapter(
- adapter_converter.converter.config, adapter_converter.adapter_config
- )
- state_dict = model_with_adapter.state_dict()
- for convert_info in convert_info_list:
- param_names = convert_info.param_names
- for param_name in param_names:
- assert param_name in state_dict
-
-
-@pytest.mark.parametrize(
- "model_name, model_config",
- model_with_adpater_name_config_map.items(),
-)
-def test_model_with_lora_match_spec(
- converter: OneOfConverter,
- lora_spec_data: Dict[str, Any],
- adapter_config: LoraConfig,
-):
- model_arch = get_model_arch(converter.config)
- adapter_converter_cls = get_adapter_converter_factory(model_arch)
- adapter_converter = adapter_converter_cls(converter, adapter_config)
-
- convert_info_list = adapter_converter.get_convert_info_list()
- model_with_adapter = get_meta_model_with_adapter(
- adapter_converter.converter.config, adapter_converter.adapter_config
- )
- state_dict = model_with_adapter.state_dict()
- for convert_info in convert_info_list:
- converted_name, reshape_fn, param_names, data_type = (
- convert_info.converted_name,
- convert_info.reshape_fn,
- convert_info.param_names,
- convert_info.data_type,
- )
- assert lora_spec_data[converted_name].dtype == get_numpy_data_type(
- data_type
- ), f"data type mismatch for {converted_name}: {param_names}"
- params = [
- get_tensor_from_state_dict(state_dict, param_name)
- for param_name in param_names
- ]
- reshaped_tensor = reshape_fn(params)
- assert (
- lora_spec_data[converted_name].shape == reshaped_tensor.shape
- ), f"shape mismatch for {converted_name}: {param_names}"
diff --git a/tests/unit_tests/modules/test_smoothquant.py b/tests/unit_tests/modules/test_smoothquant.py
deleted file mode 100644
index 06904bc5..00000000
--- a/tests/unit_tests/modules/test_smoothquant.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
-
-# # Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-
-# from __future__ import annotations
-
-# from typing import Any, Dict
-
-# import pytest
-
-# from friendli.modules.converter.base import OneOfConverter
-# from friendli.modules.converter.utils import get_tensor_from_state_dict
-# from friendli.modules.quantizer.maps import get_quantized_converter
-# from friendli.modules.quantizer.schema.config import SmoothQuantArgs, SmoothQuantConfig
-
-# from tests.unit_tests.modules.conftest import model_name_config_map
-# from tests.unit_tests.modules.helpers.utils import (
-# SmoothQuantModelConfig,
-# get_numpy_data_type,
-# get_param_specs,
-# get_smoothquant_quantized_meta_model,
-# )
-
-# smoothquant_models = [
-# "bloom",
-# "codegen",
-# "falcon",
-# "falcon_7b",
-# "gpt_j",
-# "gpt_neox",
-# "llama",
-# "mpt",
-# "opt",
-# ]
-# smoothquant_model_name_config_map = {}
-# for model_name, model_config in model_name_config_map.items():
-# if model_name in smoothquant_models:
-# smoothquant_model_name_config_map[model_name] = model_config
-
-
-# @pytest.fixture
-# def quant_config() -> SmoothQuantConfig:
-# return SmoothQuantConfig(
-# smoothquant_args=SmoothQuantArgs(
-# attn_fc_smoothing=True,
-# ff2_smoothing=True,
-# )
-# )
-
-
-# @pytest.fixture
-# def render_smoothquant_model_config(
-# converter: OneOfConverter, quant_config: SmoothQuantConfig
-# ) -> SmoothQuantModelConfig:
-# return SmoothQuantModelConfig(
-# dtype="float16",
-# num_decoder_layers=converter.decoder_layer_num,
-# hidden_size=converter.decoder_hidden_size,
-# num_heads=converter.decoder_num_attention_heads,
-# num_kv_heads=converter.decoder_num_kv_attention_heads,
-# head_size=converter.decoder_head_size,
-# num_encoder_layers=converter.decoder_layer_num, # same as decoder for test
-# ff_intermediate_size=converter.decoder_ff_intermediate_size,
-# attn_fc_smoothing=quant_config.smoothquant_args.attn_fc_smoothing,
-# ff2_smoothing=quant_config.smoothquant_args.ff2_smoothing,
-# q_dtype="int8",
-# )
-
-
-# @pytest.fixture
-# def smoothquant_spec_data(
-# model_name: str, render_smoothquant_model_config: SmoothQuantModelConfig
-# ) -> Dict[str, Any]:
-# param_specs = get_param_specs(
-# model_name, "smoothquant", render_smoothquant_model_config
-# )
-# return param_specs
-
-
-# @pytest.mark.parametrize(
-# "model_config",
-# smoothquant_model_name_config_map.values(),
-# )
-# def test_convert_info_list_match_hf_state_dict(
-# converter: OneOfConverter, quant_config: SmoothQuantConfig
-# ):
-# quantizer = get_quantized_converter(quant_config, converter)
-# convert_info_list = quantizer.get_convert_info_list()
-# assert len(convert_info_list) != 0
-# quantized_model = get_smoothquant_quantized_meta_model(converter.config, quantizer)
-# state_dict = quantized_model.state_dict()
-# for convert_info in convert_info_list:
-# param_names = convert_info.param_names
-# for param_name in param_names:
-# assert param_name in state_dict
-
-
-# @pytest.mark.parametrize(
-# "model_name, model_config",
-# smoothquant_model_name_config_map.items(),
-# )
-# def test_quantized_model_match_spec(
-# converter: OneOfConverter,
-# smoothquant_spec_data: Dict[str, Any],
-# quant_config: SmoothQuantConfig,
-# ):
-# quantizer = get_quantized_converter(quant_config, converter)
-# quantized_model = get_smoothquant_quantized_meta_model(converter.config, quantizer)
-# state_dict = quantized_model.state_dict()
-# convert_info_list = quantizer.get_convert_info_list()
-# for convert_info in convert_info_list:
-# converted_name, reshape_fn, param_names, data_type = (
-# convert_info.converted_name,
-# convert_info.reshape_fn,
-# convert_info.param_names,
-# convert_info.data_type,
-# )
-# assert smoothquant_spec_data[converted_name].dtype == get_numpy_data_type(
-# data_type
-# ), f"data type mismatch for {converted_name}: {param_names}"
-# params = [
-# get_tensor_from_state_dict(state_dict, param_name)
-# for param_name in param_names
-# ]
-# reshaped_tensor = reshape_fn(params)
-# assert (
-# smoothquant_spec_data[converted_name].shape == reshaped_tensor.shape
-# ), f"shape mismatch for {converted_name}: {param_names}"