diff --git a/src/llmtuner/__init__.py b/src/llmtuner/__init__.py index fde809590a..895a2c4897 100644 --- a/src/llmtuner/__init__.py +++ b/src/llmtuner/__init__.py @@ -7,4 +7,4 @@ from llmtuner.webui import create_ui, create_web_demo -__version__ = "0.2.1" +__version__ = "0.2.2" diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 6627e95d07..95916b69fa 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -150,6 +150,14 @@ def register_model_group( ) +register_model_group( + models={ + "LingoWhale-8B": "deeplang-ai/LingoWhale-8B" + }, + module="qkv_proj" +) + + register_model_group( models={ "LLaMA-7B": "huggyllama/llama-7b", diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index ab9e9ffc10..18a8c475d4 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -136,7 +136,7 @@ def _parse_train_args(self, data: Dict[Component, Any]) -> Dict[str, Any]: args["upcast_layernorm"] = True if args["stage"] == "ppo": - args["reward_model"] = get("train.reward_model") + args["reward_model"] = get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("train.reward_model")) if args["stage"] == "dpo": args["dpo_beta"] = get("train.dpo_beta")