Update minor version v1.5.0

Co-authored-by: Siyoon Lee <[email protected]>
friendliai · Aug 6, 2024 · 0d8f5f9 · 0d8f5f9
1 parent 25a0d6d
commit 0d8f5f9
Show file tree

Hide file tree

Showing 127 changed files with 77 additions and 22,449 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,11 @@ Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
 -->
 
 <p align="center">
-  <img src="https://docs.friendli.ai/img/logo.svg" width="30%" alt="Friendli Logo">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://docs.friendli.ai/img/logo_dark.svg">
+    <source media="(prefers-color-scheme: light)" srcset="https://docs.friendli.ai/img/logo.svg">
+    <img width="30%" alt="Friendli Logo" src="https://docs.friendli.ai/img/logo.svg">
+  </picture>
 </p>
 
 <h2><p align="center">Supercharge Generative AI Serving with Friendli 🚀</p></h2>

diff --git a/friendli/cli/model.py b/friendli/cli/model.py
@@ -6,25 +6,12 @@
 
 from __future__ import annotations
 
-import os
-from typing import Optional, cast
-
 import typer
-import yaml
 
-from friendli.enums import CheckpointFileType, ModelDataType
-from friendli.errors import (
-    CheckpointConversionError,
-    InvalidConfigError,
-    NotFoundError,
-    NotSupportedQuantConfigError,
-    QuantizationError,
-)
 from friendli.formatter import TableFormatter
 from friendli.sdk.client import Friendli
-from friendli.utils.compat import model_dump, model_parse
+from friendli.utils.compat import model_dump
 from friendli.utils.decorator import check_api
-from friendli.utils.format import secho_error_and_exit
 
 app = typer.Typer(
     no_args_is_help=True,
@@ -53,350 +40,3 @@ def list_models():
     models = client.model.list()
     models_ = [model_dump(model) for model in iter(models)]
     table_formatter.render(models_)
-
-
-@app.command()
-def convert(
-    model_name_or_path: str = typer.Option(
-        ...,
-        "--model-name-or-path",
-        "-m",
-        help="Hugging Face pretrained model name or path to the saved model checkpoint.",
-    ),
-    output_dir: str = typer.Option(
-        ...,
-        "--output-dir",
-        "-o",
-        help=(
-            "Directory path to save the converted checkpoint and related configuration "
-            "files. Three files will be created in the directory: `model.h5`, "
-            "`tokenizer.json`, and `attr.yaml`. "
-            "The `model.h5` or `model.safetensors` is the converted checkpoint and can be renamed using "
-            "the `--output-model-filename` option. "
-            "The `tokenizer.json` is the Friendli-compatible tokenizer file, which should "
-            "be uploaded along with the checkpoint file to tokenize the model input "
-            "and output. "
-            "The `attr.yaml` is the checkpoint attribute file, to be used when uploading "
-            "the converted model to Friendli. You can designate the file name using "
-            "the `--output-attr-filename` option."
-        ),
-    ),
-    data_type: ModelDataType = typer.Option(
-        None, "--data-type", "-dt", help="The data type of converted checkpoint."
-    ),
-    cache_dir: Optional[str] = typer.Option(
-        None, "--cache-dir", help="Directory for downloading checkpoint."
-    ),
-    dry_run: bool = typer.Option(
-        False, "--dry-run", help="Only check conversion avaliability."
-    ),
-    output_model_file_name: str = typer.Option(
-        None,
-        "--output-model-filename",
-        help="Name of the converted checkpoint file."
-        "The default file name is `model.h5` when `--output-ckpt-file-type` is `hdf5` or `model.safetensors` when `--output-ckpt-file-type` is `safetensors`.",
-    ),
-    output_ckpt_file_type: CheckpointFileType = typer.Option(
-        CheckpointFileType.SAFETENSORS,
-        "--output-ckpt-file-type",
-        help="File format of the converted checkpoint file. The default output ckpt file type is `safetensors`.",
-    ),
-    output_attr_file_name: str = typer.Option(
-        "attr.yaml",
-        "--output-attr-filename",
-        help="Name of the checkpoint attribute file.",
-    ),
-    quantize: bool = typer.Option(
-        False,
-        "--quantize",
-        help="Quantize the model before conversion",
-    ),
-    quant_config_file: Optional[typer.FileText] = typer.Option(
-        None,
-        "--quant-config-file",
-        help="Path to the quantization configuration file.",
-    ),
-):
-    """Convert huggingface's model checkpoint to Friendli format.
-
-    When a checkpoint is in the Hugging Face format, it cannot be directly served.
-    It requires conversion to the Friendli format for serving. The conversion
-    process involves copying the original checkpoint and transforming it into a
-    checkpoint in the Friendli format (*.h5).
-
-    :::caution
-    The `friendli model convert` is available only when the package is installed with
-    `pip install "friendli-client[mllib]"`.
-    :::
-
-    ### Apply quantization
-
-    If you want to quantize the model along with the conversion, `--quantize` option
-    should be provided. You can customize the quantization configuration by describing
-    it in a YAML file and providing the path to the file to `--quant-config-file`
-    option. When `--quantize` option is used without providing `--quant-config-file`,
-    the following configuration is used by default.
-
-    ```yaml
-    # Default quantization configuration
-    mode: awq
-    device: cuda:0
-    seed: 42
-    offload: true
-    calibration_dataset:
-        path_or_name: lambada
-        format: json
-        split: validation
-        lookup_column_name: text
-        num_samples: 128
-        max_length: 512
-        batch_size: 1
-    awq_args:
-        quant_bit: 4
-        quant_group_size: 64
-    ```
-
-    - **`mode`**: Quantization scheme to apply. Defaults to "awq".
-    - **`device`**: Device to run the quantization process. Defaults to "cuda:0".
-    - **`seed`**: Random seed. Defaults to 42.
-    - **`offload`**: When enabled, this option significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to true.
-    - **`calibration_dataset`**
-        - **`path_or_name`**: Path or name of the dataset. Datasets from either the Hugging Face Datasets Hub or local file system can be used. Defaults to "lambada".
-        - **`format`**: Format of datasets. Defaults to "json".
-        - **`split`**: Which split of the data to load. Defaults to "validation".
-        - **`lookup_column_name`**: The name of a column in the dataset to be used as calibration inputs. Defaults to "text".
-        - **`num_samples`**: The number of dataset samples to use for calibration. Note that the dataset will be shuffled before sampling. Defaults to 512.
-        - **`max_length`**: The maximum length of a calibration input sequence. Defauts to 512.
-        - **`batch_size`**: The number of samples to process in a single batch. Defaults to 1.
-    - **`awq_args`** (Fill in this field only for "awq" mode)
-        - **`quant_bit`** : Bit width of integers to represent weights. Possible values are `4` or `8`. Defaults to 4.
-        - **`quant_group_size`**: Group size of quantized matrices. 64 is the only supported value at this time. Defaults to 64.
-
-    :::tip
-    If you encounter OOM issues when running with AWQ, try enabling the `offload` option.
-    :::
-
-    :::tip
-    If you set `percentile` in quant-config-file into 100,
-    the quantization range will be determined by the maximum absolute values of the activation tensors.
-    :::
-
-    :::info
-    Currently, [AWQ](https://arxiv.org/abs/2306.00978) is the only supported quantization scheme.
-    :::
-
-    :::info
-    AWQ is supported only for models with architecture listed as follows:
-
-    - `GPTNeoXForCausalLM`
-    - `GPTJForCausalLM`
-    - `LlamaForCausalLM`
-    - `MPTForCausalLM`
-    :::
-
-    """
-    # pylint: disable=too-many-branches
-    try:
-        # pylint: disable=import-outside-toplevel
-        from friendli.modules.converter.convert import convert_checkpoint
-        from friendli.modules.quantizer.schema.config import (
-            AWQConfig,
-            OneOfQuantConfig,
-            QuantConfig,
-        )
-        from friendli.modules.quantizer_v2.quantize import quantize_checkpoint
-        from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig
-
-        # pylint: enable=import-outside-toplevel
-    except ModuleNotFoundError as exc:
-        secho_error_and_exit(str(exc))
-
-    if not os.path.isdir(output_dir):
-        if os.path.exists(output_dir):
-            secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.")
-        os.mkdir(output_dir)
-
-    quant_config: Optional[OneOfQuantConfig] = None
-    use_quantizer_v2 = False
-    if quantize:
-        if quant_config_file:
-            try:
-                quant_config_dict = cast(dict, yaml.safe_load(quant_config_file.read()))
-            except yaml.YAMLError as err:
-                secho_error_and_exit(f"Failed to load the quant config file: {err}")
-            if quant_config_dict["mode"] == "int8":
-                quant_config = model_parse(  # type: ignore
-                    Int8QuantConfig, quant_config_dict
-                )
-            else:
-                quant_config = model_parse(
-                    QuantConfig, {"config": quant_config_dict}
-                ).config
-
-            # TODO(SA): All Quantization mode will be migrated to V2. After migration, please remove it.
-        else:
-            quant_config = AWQConfig()
-
-        if isinstance(quant_config, Int8QuantConfig):
-            use_quantizer_v2 = True
-
-    default_names = {
-        CheckpointFileType.HDF5: "model.h5",
-        CheckpointFileType.SAFETENSORS: "model.safetensors",
-    }
-    output_model_file_name = (
-        output_model_file_name or default_names[output_ckpt_file_type]
-    )
-
-    if use_quantizer_v2:
-        if output_ckpt_file_type == CheckpointFileType.HDF5:
-            secho_error_and_exit(
-                f"int8 quantization only supports `safetensors` output_ckpt_file_type. Current output_ckpt_file_type: {output_ckpt_file_type}"
-            )
-        try:
-            assert isinstance(quant_config, Int8QuantConfig)
-            quantize_checkpoint(
-                model_name_or_path=model_name_or_path,
-                output_dir=output_dir,
-                cache_dir=cache_dir,
-                dry_run=dry_run,
-                quant_config=quant_config,
-            )
-        except (NotFoundError, QuantizationError, NotSupportedQuantConfigError) as exc:
-            secho_error_and_exit(str(exc))
-    else:
-        try:
-            convert_checkpoint(
-                model_name_or_path=model_name_or_path,
-                output_model_file_name=output_model_file_name,
-                output_ckpt_file_type=output_ckpt_file_type,
-                output_attr_file_name=output_attr_file_name,
-                output_dir=output_dir,
-                data_type=data_type,
-                cache_dir=cache_dir,
-                dry_run=dry_run,
-                quantize=quantize,
-                quant_config=quant_config,
-            )
-        except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc:
-            secho_error_and_exit(str(exc))
-
-    msg = (
-        f"Checkpoint({model_name_or_path}) can be converted."
-        if dry_run
-        else f"Checkpoint({model_name_or_path}) has been converted successfully."
-    )
-    typer.secho(msg)
-
-
-@app.command()
-def convert_adapter(
-    adapter_name_or_path: str = typer.Option(
-        ...,
-        "--adapter-name-or-path",
-        "-a",
-        help="Hugging Face pretrained adapter name or path to the saved adapter checkpoint.",
-    ),
-    output_dir: str = typer.Option(
-        ...,
-        "--output-dir",
-        "-o",
-        help=(
-            "Directory path to save the converted adapter checkpoint and related configuration "
-            "files. Two files will be created in the directory: `adapter.h5`, "
-            "and `attr.yaml`. "
-            "The `adapter.h5` is the converted checkpoint and can be renamed using "
-            "the `--output-adapter-filename` option. "
-            "The `attr.yaml` is the adapter checkpoint attribute file, to be used when uploading "
-            "the converted model to Friendli. You can designate the file name using "
-            "the `--output-attr-filename` option."
-        ),
-    ),
-    data_type: ModelDataType = typer.Option(
-        None, "--data-type", "-dt", help="The data type of converted checkpoint."
-    ),
-    base_model_name_or_path: Optional[str] = typer.Option(
-        None,
-        "--base-model-name-or-path",
-        "-b",
-        help=(
-            "Hugging Face model name or path to the saved backbone checkpoint. "
-            "By default, we use the `base_model_name_or_path` in adapter_config.json."
-        ),
-    ),
-    cache_dir: Optional[str] = typer.Option(
-        None, "--cache-dir", help="Directory for downloading checkpoint."
-    ),
-    dry_run: bool = typer.Option(
-        False, "--dry-run", help="Only check conversion avaliability."
-    ),
-    output_adapter_filename: str = typer.Option(
-        "adapter.h5",
-        "--output-adapter-filename",
-        help="Name of the converted adapter checkpoint file.",
-    ),
-    output_attr_filename: str = typer.Option(
-        "adapter_attr.yaml",
-        "--output-attr-filename",
-        help="Name of the adapter checkpoint attribute file.",
-    ),
-) -> None:
-    """Convert huggingface's adapter checkpoint to Friendli format.
-
-    When an adapter checkpoint is in the Hugging Face PEFT format, it cannot
-    be directly served in Friendli. It requires conversion to the Friendli format.
-    The conversion process involves copying the original adapter checkpoint and
-    transforming it into a checkpoint in the Friendli format (*.h5).
-
-    This function does not include the `friendli model convert` command. i.e.
-    `friendli model convert-adapter` only converts adapter's parameters, not backbone's.
-
-    :::caution
-    The `friendli model convert-adapter` is available only when the package is installed with
-    `pip install "friendli-client[mllib]"`.
-    :::
-
-    """
-    try:
-        from friendli.modules.converter.convert import (  # pylint: disable=import-outside-toplevel
-            convert_adapter_checkpoint,
-        )
-    except ModuleNotFoundError as exc:
-        secho_error_and_exit(str(exc))
-
-    if not os.path.isdir(output_dir):
-        if os.path.exists(output_dir):
-            secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.")
-        os.mkdir(output_dir)
-
-    # Engine cannot load a Safetensors Lora ckpt yet.
-    output_adapter_file_type = CheckpointFileType.HDF5
-    default_names = {
-        CheckpointFileType.HDF5: "adapter.h5",
-        CheckpointFileType.SAFETENSORS: "adapter.safetensors",
-    }
-    output_adapter_filename = (
-        output_adapter_filename or default_names[output_adapter_file_type]
-    )
-
-    try:
-        convert_adapter_checkpoint(
-            adapter_name_or_path=adapter_name_or_path,
-            output_attr_filename=output_attr_filename,
-            output_dir=output_dir,
-            output_adapter_filename=output_adapter_filename,
-            base_model_name_or_path=base_model_name_or_path,
-            data_type=data_type,
-            output_adapter_file_type=output_adapter_file_type,
-            cache_dir=cache_dir,
-            dry_run=dry_run,
-        )
-    except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc:
-        secho_error_and_exit(str(exc))
-
-    msg = (
-        f"Checkpoint({adapter_name_or_path}) can be converted."
-        if dry_run
-        else f"Checkpoint({adapter_name_or_path}) has been converted successfully."
-    )
-    typer.secho(msg)
diff --git a/friendli/modules/__init__.py b/friendli/modules/__init__.py
diff --git a/friendli/modules/converter/__init__.py b/friendli/modules/converter/__init__.py