From 9a077ebff255403084331df4f461541ac75b1a4b Mon Sep 17 00:00:00 2001 From: "Wu, Gangsheng" Date: Fri, 5 Jan 2024 13:06:59 +0000 Subject: [PATCH] add mistralai/Mistral-7B-v0.1 to finetune workflow --- .github/workflows/workflow_finetune.yml | 7 ++++++- finetune/finetune.py | 2 +- finetune/finetune.yaml | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml index 0cf51cd18..38974607f 100644 --- a/.github/workflows/workflow_finetune.yml +++ b/.github/workflows/workflow_finetune.yml @@ -34,7 +34,7 @@ jobs: name: finetune test strategy: matrix: - model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b ] + model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b, mistralai/Mistral-7B-v0.1 ] isPR: - ${{inputs.ci_type == 'pr'}} @@ -43,6 +43,7 @@ jobs: include: - { model: "EleutherAI/gpt-j-6b"} - { model: "meta-llama/Llama-2-7b-chat-hf"} + - { model: "mistralai/Mistral-7B-v0.1"} runs-on: self-hosted @@ -96,6 +97,10 @@ jobs: result['General']["gpt_base_model"] = True else: result['General']["gpt_base_model"] = False + if "${{ matrix.model }}" == "mistralai/Mistral-7B-v0.1": + result['General']['lora_config']['target_modules'] = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head",] + else: + result['General']['lora_config']['target_modules'] = None if "${{ matrix.model }}" == "meta-llama/Llama-2-7b-chat-hf": result['General']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}" else: diff --git a/finetune/finetune.py b/finetune/finetune.py index c752de085..26a175d3c 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -111,7 +111,7 @@ def train_func(config: Dict[str, Any]): trainer = common.trainer.Trainer.registory.get("DefaultTrainer")(config = { "num_train_epochs": config["Training"]["epochs"], "max_train_step": config["Training"].get("max_train_steps", None), - "log_step": 1, + "log_step": config["General"].get("log_step", 10), "output": config["General"]["output_dir"], "dataprocesser": { "type": "GeneralProcesser", diff --git a/finetune/finetune.yaml b/finetune/finetune.yaml index f0092022d..b0cb6ef5b 100644 --- a/finetune/finetune.yaml +++ b/finetune/finetune.yaml @@ -1,6 +1,7 @@ General: base_model: EleutherAI/gpt-j-6b gpt_base_model: true + log_step: 10 output_dir: /tmp/llm-ray/output checkpoint_dir: /tmp/llm-ray/checkpoint config: