Skip to content

Commit

Permalink
Update minor version v1.5.0
Browse files Browse the repository at this point in the history
Co-authored-by: Siyoon Lee <[email protected]>
  • Loading branch information
2 people authored and kooyunmo committed Aug 6, 2024
1 parent 25a0d6d commit 0d8f5f9
Show file tree
Hide file tree
Showing 127 changed files with 77 additions and 22,449 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Copyright (c) 2022-present, FriendliAI Inc. All rights reserved.
-->

<p align="center">
<img src="https://docs.friendli.ai/img/logo.svg" width="30%" alt="Friendli Logo">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://docs.friendli.ai/img/logo_dark.svg">
<source media="(prefers-color-scheme: light)" srcset="https://docs.friendli.ai/img/logo.svg">
<img width="30%" alt="Friendli Logo" src="https://docs.friendli.ai/img/logo.svg">
</picture>
</p>

<h2><p align="center">Supercharge Generative AI Serving with Friendli 🚀</p></h2>
Expand Down
362 changes: 1 addition & 361 deletions friendli/cli/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,12 @@

from __future__ import annotations

import os
from typing import Optional, cast

import typer
import yaml

from friendli.enums import CheckpointFileType, ModelDataType
from friendli.errors import (
CheckpointConversionError,
InvalidConfigError,
NotFoundError,
NotSupportedQuantConfigError,
QuantizationError,
)
from friendli.formatter import TableFormatter
from friendli.sdk.client import Friendli
from friendli.utils.compat import model_dump, model_parse
from friendli.utils.compat import model_dump
from friendli.utils.decorator import check_api
from friendli.utils.format import secho_error_and_exit

app = typer.Typer(
no_args_is_help=True,
Expand Down Expand Up @@ -53,350 +40,3 @@ def list_models():
models = client.model.list()
models_ = [model_dump(model) for model in iter(models)]
table_formatter.render(models_)


@app.command()
def convert(
model_name_or_path: str = typer.Option(
...,
"--model-name-or-path",
"-m",
help="Hugging Face pretrained model name or path to the saved model checkpoint.",
),
output_dir: str = typer.Option(
...,
"--output-dir",
"-o",
help=(
"Directory path to save the converted checkpoint and related configuration "
"files. Three files will be created in the directory: `model.h5`, "
"`tokenizer.json`, and `attr.yaml`. "
"The `model.h5` or `model.safetensors` is the converted checkpoint and can be renamed using "
"the `--output-model-filename` option. "
"The `tokenizer.json` is the Friendli-compatible tokenizer file, which should "
"be uploaded along with the checkpoint file to tokenize the model input "
"and output. "
"The `attr.yaml` is the checkpoint attribute file, to be used when uploading "
"the converted model to Friendli. You can designate the file name using "
"the `--output-attr-filename` option."
),
),
data_type: ModelDataType = typer.Option(
None, "--data-type", "-dt", help="The data type of converted checkpoint."
),
cache_dir: Optional[str] = typer.Option(
None, "--cache-dir", help="Directory for downloading checkpoint."
),
dry_run: bool = typer.Option(
False, "--dry-run", help="Only check conversion avaliability."
),
output_model_file_name: str = typer.Option(
None,
"--output-model-filename",
help="Name of the converted checkpoint file."
"The default file name is `model.h5` when `--output-ckpt-file-type` is `hdf5` or `model.safetensors` when `--output-ckpt-file-type` is `safetensors`.",
),
output_ckpt_file_type: CheckpointFileType = typer.Option(
CheckpointFileType.SAFETENSORS,
"--output-ckpt-file-type",
help="File format of the converted checkpoint file. The default output ckpt file type is `safetensors`.",
),
output_attr_file_name: str = typer.Option(
"attr.yaml",
"--output-attr-filename",
help="Name of the checkpoint attribute file.",
),
quantize: bool = typer.Option(
False,
"--quantize",
help="Quantize the model before conversion",
),
quant_config_file: Optional[typer.FileText] = typer.Option(
None,
"--quant-config-file",
help="Path to the quantization configuration file.",
),
):
"""Convert huggingface's model checkpoint to Friendli format.
When a checkpoint is in the Hugging Face format, it cannot be directly served.
It requires conversion to the Friendli format for serving. The conversion
process involves copying the original checkpoint and transforming it into a
checkpoint in the Friendli format (*.h5).
:::caution
The `friendli model convert` is available only when the package is installed with
`pip install "friendli-client[mllib]"`.
:::
### Apply quantization
If you want to quantize the model along with the conversion, `--quantize` option
should be provided. You can customize the quantization configuration by describing
it in a YAML file and providing the path to the file to `--quant-config-file`
option. When `--quantize` option is used without providing `--quant-config-file`,
the following configuration is used by default.
```yaml
# Default quantization configuration
mode: awq
device: cuda:0
seed: 42
offload: true
calibration_dataset:
path_or_name: lambada
format: json
split: validation
lookup_column_name: text
num_samples: 128
max_length: 512
batch_size: 1
awq_args:
quant_bit: 4
quant_group_size: 64
```
- **`mode`**: Quantization scheme to apply. Defaults to "awq".
- **`device`**: Device to run the quantization process. Defaults to "cuda:0".
- **`seed`**: Random seed. Defaults to 42.
- **`offload`**: When enabled, this option significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to true.
- **`calibration_dataset`**
- **`path_or_name`**: Path or name of the dataset. Datasets from either the Hugging Face Datasets Hub or local file system can be used. Defaults to "lambada".
- **`format`**: Format of datasets. Defaults to "json".
- **`split`**: Which split of the data to load. Defaults to "validation".
- **`lookup_column_name`**: The name of a column in the dataset to be used as calibration inputs. Defaults to "text".
- **`num_samples`**: The number of dataset samples to use for calibration. Note that the dataset will be shuffled before sampling. Defaults to 512.
- **`max_length`**: The maximum length of a calibration input sequence. Defauts to 512.
- **`batch_size`**: The number of samples to process in a single batch. Defaults to 1.
- **`awq_args`** (Fill in this field only for "awq" mode)
- **`quant_bit`** : Bit width of integers to represent weights. Possible values are `4` or `8`. Defaults to 4.
- **`quant_group_size`**: Group size of quantized matrices. 64 is the only supported value at this time. Defaults to 64.
:::tip
If you encounter OOM issues when running with AWQ, try enabling the `offload` option.
:::
:::tip
If you set `percentile` in quant-config-file into 100,
the quantization range will be determined by the maximum absolute values of the activation tensors.
:::
:::info
Currently, [AWQ](https://arxiv.org/abs/2306.00978) is the only supported quantization scheme.
:::
:::info
AWQ is supported only for models with architecture listed as follows:
- `GPTNeoXForCausalLM`
- `GPTJForCausalLM`
- `LlamaForCausalLM`
- `MPTForCausalLM`
:::
"""
# pylint: disable=too-many-branches
try:
# pylint: disable=import-outside-toplevel
from friendli.modules.converter.convert import convert_checkpoint
from friendli.modules.quantizer.schema.config import (
AWQConfig,
OneOfQuantConfig,
QuantConfig,
)
from friendli.modules.quantizer_v2.quantize import quantize_checkpoint
from friendli.modules.quantizer_v2.schema.config import Int8QuantConfig

# pylint: enable=import-outside-toplevel
except ModuleNotFoundError as exc:
secho_error_and_exit(str(exc))

if not os.path.isdir(output_dir):
if os.path.exists(output_dir):
secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.")
os.mkdir(output_dir)

quant_config: Optional[OneOfQuantConfig] = None
use_quantizer_v2 = False
if quantize:
if quant_config_file:
try:
quant_config_dict = cast(dict, yaml.safe_load(quant_config_file.read()))
except yaml.YAMLError as err:
secho_error_and_exit(f"Failed to load the quant config file: {err}")
if quant_config_dict["mode"] == "int8":
quant_config = model_parse( # type: ignore
Int8QuantConfig, quant_config_dict
)
else:
quant_config = model_parse(
QuantConfig, {"config": quant_config_dict}
).config

# TODO(SA): All Quantization mode will be migrated to V2. After migration, please remove it.
else:
quant_config = AWQConfig()

if isinstance(quant_config, Int8QuantConfig):
use_quantizer_v2 = True

default_names = {
CheckpointFileType.HDF5: "model.h5",
CheckpointFileType.SAFETENSORS: "model.safetensors",
}
output_model_file_name = (
output_model_file_name or default_names[output_ckpt_file_type]
)

if use_quantizer_v2:
if output_ckpt_file_type == CheckpointFileType.HDF5:
secho_error_and_exit(
f"int8 quantization only supports `safetensors` output_ckpt_file_type. Current output_ckpt_file_type: {output_ckpt_file_type}"
)
try:
assert isinstance(quant_config, Int8QuantConfig)
quantize_checkpoint(
model_name_or_path=model_name_or_path,
output_dir=output_dir,
cache_dir=cache_dir,
dry_run=dry_run,
quant_config=quant_config,
)
except (NotFoundError, QuantizationError, NotSupportedQuantConfigError) as exc:
secho_error_and_exit(str(exc))
else:
try:
convert_checkpoint(
model_name_or_path=model_name_or_path,
output_model_file_name=output_model_file_name,
output_ckpt_file_type=output_ckpt_file_type,
output_attr_file_name=output_attr_file_name,
output_dir=output_dir,
data_type=data_type,
cache_dir=cache_dir,
dry_run=dry_run,
quantize=quantize,
quant_config=quant_config,
)
except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc:
secho_error_and_exit(str(exc))

msg = (
f"Checkpoint({model_name_or_path}) can be converted."
if dry_run
else f"Checkpoint({model_name_or_path}) has been converted successfully."
)
typer.secho(msg)


@app.command()
def convert_adapter(
adapter_name_or_path: str = typer.Option(
...,
"--adapter-name-or-path",
"-a",
help="Hugging Face pretrained adapter name or path to the saved adapter checkpoint.",
),
output_dir: str = typer.Option(
...,
"--output-dir",
"-o",
help=(
"Directory path to save the converted adapter checkpoint and related configuration "
"files. Two files will be created in the directory: `adapter.h5`, "
"and `attr.yaml`. "
"The `adapter.h5` is the converted checkpoint and can be renamed using "
"the `--output-adapter-filename` option. "
"The `attr.yaml` is the adapter checkpoint attribute file, to be used when uploading "
"the converted model to Friendli. You can designate the file name using "
"the `--output-attr-filename` option."
),
),
data_type: ModelDataType = typer.Option(
None, "--data-type", "-dt", help="The data type of converted checkpoint."
),
base_model_name_or_path: Optional[str] = typer.Option(
None,
"--base-model-name-or-path",
"-b",
help=(
"Hugging Face model name or path to the saved backbone checkpoint. "
"By default, we use the `base_model_name_or_path` in adapter_config.json."
),
),
cache_dir: Optional[str] = typer.Option(
None, "--cache-dir", help="Directory for downloading checkpoint."
),
dry_run: bool = typer.Option(
False, "--dry-run", help="Only check conversion avaliability."
),
output_adapter_filename: str = typer.Option(
"adapter.h5",
"--output-adapter-filename",
help="Name of the converted adapter checkpoint file.",
),
output_attr_filename: str = typer.Option(
"adapter_attr.yaml",
"--output-attr-filename",
help="Name of the adapter checkpoint attribute file.",
),
) -> None:
"""Convert huggingface's adapter checkpoint to Friendli format.
When an adapter checkpoint is in the Hugging Face PEFT format, it cannot
be directly served in Friendli. It requires conversion to the Friendli format.
The conversion process involves copying the original adapter checkpoint and
transforming it into a checkpoint in the Friendli format (*.h5).
This function does not include the `friendli model convert` command. i.e.
`friendli model convert-adapter` only converts adapter's parameters, not backbone's.
:::caution
The `friendli model convert-adapter` is available only when the package is installed with
`pip install "friendli-client[mllib]"`.
:::
"""
try:
from friendli.modules.converter.convert import ( # pylint: disable=import-outside-toplevel
convert_adapter_checkpoint,
)
except ModuleNotFoundError as exc:
secho_error_and_exit(str(exc))

if not os.path.isdir(output_dir):
if os.path.exists(output_dir):
secho_error_and_exit(f"'{output_dir}' exists, but it is not a directory.")
os.mkdir(output_dir)

# Engine cannot load a Safetensors Lora ckpt yet.
output_adapter_file_type = CheckpointFileType.HDF5
default_names = {
CheckpointFileType.HDF5: "adapter.h5",
CheckpointFileType.SAFETENSORS: "adapter.safetensors",
}
output_adapter_filename = (
output_adapter_filename or default_names[output_adapter_file_type]
)

try:
convert_adapter_checkpoint(
adapter_name_or_path=adapter_name_or_path,
output_attr_filename=output_attr_filename,
output_dir=output_dir,
output_adapter_filename=output_adapter_filename,
base_model_name_or_path=base_model_name_or_path,
data_type=data_type,
output_adapter_file_type=output_adapter_file_type,
cache_dir=cache_dir,
dry_run=dry_run,
)
except (NotFoundError, CheckpointConversionError, InvalidConfigError) as exc:
secho_error_and_exit(str(exc))

msg = (
f"Checkpoint({adapter_name_or_path}) can be converted."
if dry_run
else f"Checkpoint({adapter_name_or_path}) has been converted successfully."
)
typer.secho(msg)
3 changes: 0 additions & 3 deletions friendli/modules/__init__.py

This file was deleted.

3 changes: 0 additions & 3 deletions friendli/modules/converter/__init__.py

This file was deleted.

Loading

0 comments on commit 0d8f5f9

Please sign in to comment.