From 918290775735c21b1c35398e141becbffa4f247f Mon Sep 17 00:00:00 2001 From: minmingzhu <45281494+minmingzhu@users.noreply.github.com> Date: Wed, 10 Apr 2024 11:22:03 +0000 Subject: [PATCH] [Finetune] use base model mpt-7b instead of mpt-7b-chat (#181) * use base model mpt-7b instead of mpt-7b-chat Signed-off-by: minmingzhu * manual setting specify tokenizer Signed-off-by: minmingzhu * update Signed-off-by: minmingzhu * update doc/finetune_parameters.md Signed-off-by: minmingzhu --------- Signed-off-by: minmingzhu --- .github/workflows/night_build_memo.txt | 2 +- .github/workflows/workflow_finetune.yml | 6 +++--- docs/finetune_parameters.md | 1 + llm_on_ray/finetune/finetune.py | 6 +++++- llm_on_ray/finetune/finetune_config.py | 1 + .../finetune/models/{mpt-7b-chat.yaml => mpt-7b.yaml} | 3 ++- 6 files changed, 13 insertions(+), 6 deletions(-) rename llm_on_ray/finetune/models/{mpt-7b-chat.yaml => mpt-7b.yaml} (91%) diff --git a/.github/workflows/night_build_memo.txt b/.github/workflows/night_build_memo.txt index e5197571c..520e176e1 100644 --- a/.github/workflows/night_build_memo.txt +++ b/.github/workflows/night_build_memo.txt @@ -1 +1 @@ -finetune: gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b \ No newline at end of file +finetune: gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b, huggyllama/llama-7b \ No newline at end of file diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml index 76f1097a4..ddc547774 100644 --- a/.github/workflows/workflow_finetune.yml +++ b/.github/workflows/workflow_finetune.yml @@ -34,7 +34,7 @@ jobs: name: finetune strategy: matrix: - model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, meta-llama/Llama-2-7b-hf, mistralai/Mistral-7B-v0.1, google/gemma-2b] + model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b, meta-llama/Llama-2-7b-hf, mistralai/Mistral-7B-v0.1, google/gemma-2b] isPR: - ${{inputs.ci_type == 'pr'}} @@ -92,7 +92,7 @@ jobs: with open(conf_path, encoding="utf-8") as reader: result = yaml.load(reader, Loader=yaml.FullLoader) result['General']['base_model'] = "${{ matrix.model }}" - if "${{ matrix.model }}" == "mosaicml/mpt-7b-chat": + if "${{ matrix.model }}" == "mosaicml/mpt-7b": result['General']['config']['trust_remote_code'] = True else: result['General']['config']['trust_remote_code'] = False @@ -147,7 +147,7 @@ jobs: - name: Run Deltatuner Test on DENAS-LoRA Model run: | - if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1|google\/gemma-2b)$ ]]; then + if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1|google\/gemma-2b)$ ]]; then echo ${{ matrix.model }} is not supported! else docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*" diff --git a/docs/finetune_parameters.md b/docs/finetune_parameters.md index 531549adf..5d24f42e6 100644 --- a/docs/finetune_parameters.md +++ b/docs/finetune_parameters.md @@ -7,6 +7,7 @@ The following are the parameters supported in the finetuning workflow. |Configuration Name| Default|Meaning| |-|-|-| |base_model| EleutherAI/gpt-j-6b|Path to pretrained model or model identifier from huggingface.co/models| +|tokenizer_name|None|Path to pretrained tokenizer from huggingface.co/models. If not provided, the tokenizer will be loaded from the `base_model`.| |gpt_base_model|True|This parameter is for [Transformers#22482](https://github.com/huggingface/transformers/issues/22482). It needs to be set to True when the pretrained model is realted to gpt, otherwise it is False.| |output_dir|/tmp/llm-ray/output|The output directory to store the finetuned model| |checkpoint_dir|/tmp/llm-ray/checkpoint|The directory to store checkpoint| diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py index b31a5f01d..0f9e96f96 100644 --- a/llm_on_ray/finetune/finetune.py +++ b/llm_on_ray/finetune/finetune.py @@ -155,6 +155,10 @@ def train_func(config: Dict[str, Any]): gradient_accumulation_steps = config["Training"].get("gradient_accumulation_steps", 1) base_model = config["General"]["base_model"] + if config["General"].get("tokenizer_name") is not None: + tokenizer_name = config["General"].get("tokenizer_name") + else: + tokenizer_name = base_model dataset_file = config["Dataset"]["train_file"] seed = config["Training"].get("seed") @@ -171,7 +175,7 @@ def train_func(config: Dict[str, Any]): tokenizer = common.tokenizer.Tokenizer.registory.get("HuggingFaceTokenizer")()( config={ - "name": base_model, + "name": tokenizer_name, "config": config["General"]["config"], } ) diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py index 8f5f6ed6f..a01095c16 100644 --- a/llm_on_ray/finetune/finetune_config.py +++ b/llm_on_ray/finetune/finetune_config.py @@ -52,6 +52,7 @@ class DeltatunerConfig(BaseModel): class General(BaseModel): base_model: str + tokenizer_name: Optional[str] = None gpt_base_model: bool output_dir: str checkpoint_dir: Optional[str] diff --git a/llm_on_ray/finetune/models/mpt-7b-chat.yaml b/llm_on_ray/finetune/models/mpt-7b.yaml similarity index 91% rename from llm_on_ray/finetune/models/mpt-7b-chat.yaml rename to llm_on_ray/finetune/models/mpt-7b.yaml index b4644194f..067a093a2 100644 --- a/llm_on_ray/finetune/models/mpt-7b-chat.yaml +++ b/llm_on_ray/finetune/models/mpt-7b.yaml @@ -1,5 +1,6 @@ General: - base_model: mosaicml/mpt-7b-chat + base_model: mosaicml/mpt-7b + tokenizer_name: EleutherAI/gpt-neox-20b gpt_base_model: false output_dir: /tmp/llm-ray/output checkpoint_dir: /tmp/llm-ray/checkpoint