intel · minmingzhu · Apr 19, 2024 · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024
diff --git a/.github/workflows/config/bloom-560m-ci.yaml b/.github/workflows/config/bloom-560m-ci.yaml
@@ -13,9 +13,3 @@ ipex:
 model_description:  
   model_id_or_path: bigscience/bloom-560m
   tokenizer_name_or_path: bigscience/bloom-560m
-  chat_processor: ChatModelGptJ
-  prompt:
-    intro: ''
-    human_id: ''
-    bot_id: ''
-    stop_words: []
diff --git a/.github/workflows/config/gpt2-ci.yaml b/.github/workflows/config/gpt2-ci.yaml
@@ -12,10 +12,5 @@ ipex:
 model_description:
   model_id_or_path: gpt2
   tokenizer_name_or_path: gpt2
-  chat_processor: ChatModelGptJ
   gpt_base_model: true
-  prompt:
-    intro: ''
-    human_id: ''
-    bot_id: ''
-    stop_words: []
+  chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ message['content'].strip() }}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() }}{% endif %}{% endfor %}"
diff --git a/.github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml b/.github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml
@@ -16,13 +16,5 @@ ipex:
 model_description:
   model_id_or_path: meta-llama/Llama-2-7b-chat-hf
   tokenizer_name_or_path: meta-llama/Llama-2-7b-chat-hf
-  chat_processor: ChatModelLLama
-  prompt:
-    intro: ''
-    human_id: '[INST] {msg} [/INST]
-
-      '
-    bot_id: ''
-    stop_words: []
   config:
     use_auth_token: ''
diff --git a/.github/workflows/config/mpt_deltatuner.yaml b/.github/workflows/config/mpt_deltatuner.yaml
@@ -13,20 +13,7 @@ ipex:
 model_description:
   model_id_or_path: mosaicml/mpt-7b
   tokenizer_name_or_path: EleutherAI/gpt-neox-20b
-  chat_processor: ChatModelGptJ
   peft_model_id_or_path: nathan0/mpt-7b-deltatuner-model
   peft_type: deltatuner
-  prompt:
-    intro: 'Below is an instruction that describes a task, paired with an input that
-      provides further context. Write a response that appropriately completes the request.
-
-      '
-    human_id: '
-
-      ### Instruction'
-    bot_id: '
-
-      ### Response'
-    stop_words: []
   config:
     trust_remote_code: true
diff --git a/.github/workflows/config/mpt_deltatuner_deepspeed.yaml b/.github/workflows/config/mpt_deltatuner_deepspeed.yaml
@@ -13,20 +13,7 @@ ipex:
 model_description:
   model_id_or_path: mosaicml/mpt-7b
   tokenizer_name_or_path: EleutherAI/gpt-neox-20b
-  chat_processor: ChatModelGptJ
   peft_model_id_or_path: nathan0/mpt-7b-deltatuner-model
   peft_type: deltatuner
-  prompt:
-    intro: 'Below is an instruction that describes a task, paired with an input that
-      provides further context. Write a response that appropriately completes the request.
-
-      '
-    human_id: '
-
-      ### Instruction'
-    bot_id: '
-
-      ### Response'
-    stop_words: []
   config:
     trust_remote_code: true
diff --git a/.github/workflows/config/opt-125m-ci.yaml b/.github/workflows/config/opt-125m-ci.yaml
@@ -13,9 +13,4 @@ ipex:
 model_description:
   model_id_or_path: facebook/opt-125m
   tokenizer_name_or_path: facebook/opt-125m
-  chat_processor: ChatModelGptJ
-  prompt:
-    intro: ''
-    human_id: ''
-    bot_id: ''
-    stop_words: []
+  chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ message['content'].strip() }}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() }}{% endif %}{% endfor %}"
diff --git a/docs/finetune_parameters.md b/docs/finetune_parameters.md
@@ -15,6 +15,7 @@ The following are the parameters supported in the finetuning workflow.
 |lora_config|task_type: CAUSAL_LM<br>r: 8<br>lora_alpha: 32<br>lora_dropout: 0.1|Will be passed to the LoraConfig `__init__()` method, then it'll be used as config to build Peft model object.|
 |deltatuner_config|"algo": "lora"<br>"denas": True<br>"best_model_structure": "/path/to/best_structure_of_deltatuner_model"|Will be passed to the DeltaTunerArguments `__init__()` method, then it'll be used as config to build [Deltatuner model](https://github.com/intel/e2eAIOK/tree/main/e2eAIOK/deltatuner) object.|
 |enable_gradient_checkpointing|False|enable gradient checkpointing to save GPU memory, but will cost more compute runtime|
+|chat_template|None|User-defined chat template.|
 
 
 ## Dataset Parameters

diff --git a/examples/inference/api_server_openai/query_http_requests.py b/examples/inference/api_server_openai/query_http_requests.py
@@ -58,7 +58,6 @@
 body = {
     "model": args.model_name,
     "messages": [
-        {"role": "assistant", "content": "You are a helpful assistant."},
         {"role": "user", "content": args.input_text},
     ],
     "stream": args.streaming_response,

diff --git a/examples/inference/api_server_openai/query_http_requests_tool.py b/examples/inference/api_server_openai/query_http_requests_tool.py
@@ -73,15 +73,14 @@
 
 messages = [
     [
-        {"role": "user", "content": "You are a helpful assistant"},
         {"role": "user", "content": "What's the weather like in Boston today?"},
     ],
 ]
 
 proxies = {"http": None, "https": None}
 
 for message in messages:
-    print(f"User: {message[1]['content']}")
+    print(f"User: {message[0]['content']}")
     print("Assistant:", end=" ", flush=True)
 
     body = {

diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py
@@ -55,7 +55,12 @@
 )
 
 args = parser.parse_args()
-prompt = "Once upon a time,"
+# prompt = "Once upon a time,"
+prompt = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+]
+
+
 config: Dict[str, Union[int, float]] = {}
 if args.max_new_tokens:
     config["max_new_tokens"] = int(args.max_new_tokens)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -99,10 +99,65 @@ def torch_call(self, examples):
 
 
 class GeneralProcesser(DataProcesser):
+    def tokenize_function(self, examples, tokenizer):
+        if self.config.get("gpt_base_model"):
+            instruction = examples["instruction"]
+            response = examples["response"]
+            context = examples.get("context")
+            if not instruction:
+                raise ValueError(f"Expected an instruction in: {examples}")
+            if not response:
+                raise ValueError(f"Expected a response in: {examples}")
+            if context:
+                new_message = PROMPT_WITH_INPUT_FORMAT.format(
+                    instruction=instruction, response=response, input=context
+                )
+            else:
+                new_message = PROMPT_NO_INPUT_FORMAT.format(
+                    instruction=instruction, response=response
+                )
+            return tokenizer(
+                new_message, add_special_tokens=False, max_length=self.config.get("max_length")
+            )
+        else:
+            new_messages = [
+                {
+                    "role": "user",
+                    "content": "###Instruction:\n"
+                    + examples["instruction"]
+                    + "\n\n"
+                    + "###context:\n"
+                    + examples["context"]
+                    + "\n\n",
+                },
+                {"role": "assistant", "content": examples["response"] + "\n\n"},
+            ]
+            if self.config.get("chat_template") is not None:
+                tokenizer.chat_template = self.config.get("chat_template")
+                new_tokenizer = tokenizer.apply_chat_template(
+                    new_messages,
+                    tokenize=False,
+                )
+            elif tokenizer.chat_template is not None:
+                new_tokenizer = tokenizer.apply_chat_template(
+                    new_messages,
+                    tokenize=False,
+                )
+            else:
+                tokenizer.chat_template = self.config.get("default_chat_template")
+                new_tokenizer = tokenizer.apply_chat_template(
+                    new_messages,
+                    tokenize=False,
+                )
+            tokenizer = tokenizer(
+                new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
+            )
+            return tokenizer
+
     def prepare(self, tokenizer, dataset):
         per_device_train_batch_size = self.config.get("per_device_train_batch_size")
         per_device_eval_batch_size = self.config.get("per_device_eval_batch_size")
-        max_length = self.config.get("max_length")
+
         group = self.config.get("group")
         block_size = self.config.get("block_size")
         shuffle = self.config.get("shuffle")
@@ -114,38 +169,8 @@ def prepare(self, tokenizer, dataset):
         if isinstance(dataset, datasets.DatasetDict):
             column_names = dataset["train"].column_names
 
-        if column_names and TEXT_COLUMN_NAME not in column_names:
-
-            def prompt(rec):
-                instruction = rec["instruction"]
-                response = rec["response"]
-                context = rec.get("context")
-                if not instruction:
-                    raise ValueError(f"Expected an instruction in: {rec}")
-                if not response:
-                    raise ValueError(f"Expected a response in: {rec}")
-                if context:
-                    rec["text"] = PROMPT_WITH_INPUT_FORMAT.format(
-                        instruction=instruction, response=response, input=context
-                    )
-                else:
-                    rec["text"] = PROMPT_NO_INPUT_FORMAT.format(
-                        instruction=instruction, response=response
-                    )
-                return rec
-
-            dataset = dataset.map(
-                prompt,
-                load_from_cache_file=False,
-                desc="Prompt",
-            )
-            column_names += [TEXT_COLUMN_NAME]
-
-        def tokenize_function(examples):
-            return tokenizer(examples[TEXT_COLUMN_NAME], max_length=max_length)
-
         tokenized_datasets = dataset.map(
-            tokenize_function,
+            lambda examples: self.tokenize_function(examples, tokenizer),
             remove_columns=column_names,
             load_from_cache_file=False,
             desc="Tokenize dataset",

diff --git a/llm_on_ray/common/trainer/default_trainer.py b/llm_on_ray/common/trainer/default_trainer.py
@@ -33,6 +33,7 @@
 class DefaultTrainer(Trainer):
     def __init__(self, config):
         self.model = None
+        self.tokenizer = None
         self.config = config
         dataprocesser_config = config.get("dataprocesser")
         dataprocesser_type = dataprocesser_config.get("type")
@@ -121,7 +122,7 @@ def _get_lr_scheduler(
 
     def prepare(self, model, tokenizer, dataset, optimizer, accelerator):
         self._coordinate(accelerator)
-
+        self.tokenizer = tokenizer
         embedding_size = model.get_input_embeddings().weight.shape[0]
         logger.info(f"model embedding size: {embedding_size}")
         if len(tokenizer) > embedding_size:
@@ -288,6 +289,11 @@ def train(self):
                 is_main_process=self.accelerator.is_main_process,
                 save_function=self.accelerator.save,
             )
+            self.tokenizer.save_pretrained(
+                output,
+                is_main_process=self.accelerator.is_main_process,
+                save_function=self.accelerator.save,
+            )
             logger.info(f"finish save model to {output}")
 
         self.accelerator.wait_for_everyone()

diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-#!/usr/bin/env python
+# !/usr/bin/env python
 
 import os
 import argparse
@@ -248,6 +248,9 @@ def train_func(config: Dict[str, Any]):
                 "group": config["Dataset"].get("group", True),
                 "block_size": config["Dataset"].get("block_size", 512),
                 "shuffle": config["Dataset"].get("shuffle", False),
+                "gpt_base_model": config["General"].get("gpt_base_model", False),
+                "chat_template": config["General"]["chat_template"],
+                "default_chat_template": config["General"]["default_chat_template"],
             },
             "lr_scheduler": {
                 "enable": True,

diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
@@ -13,10 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
 
 from pydantic import BaseModel, validator
-from typing import Optional, List
+from typing import Optional, List, Dict
 
+from pydantic_yaml import parse_yaml_raw_as
 
 PRECISION_BF16 = "bf16"
 PRECISION_FP16 = "fp16"
@@ -60,6 +62,23 @@ class General(BaseModel):
     lora_config: Optional[LoraConfig] = None
     deltatuner_config: Optional[DeltatunerConfig] = None
     enable_gradient_checkpointing: bool = False
+    chat_template: Optional[str] = None
+    default_chat_template: str = (
+        "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+        "{% if messages[0]['role'] == 'system' %}"
+        "{{ raise_exception('System role not supported') }}"
+        "{% endif %}"
+        "{% for message in messages %}"
+        "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
+        "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
+        "{% endif %}"
+        "{% if message['role'] == 'user' %}"
+        "{{ '### Instruction: ' + message['content'] + eos_token }}"
+        "{% elif message['role'] == 'assistant' %}"
+        "{{ '### Response:'  + message['content'] + eos_token }}"
+        "{% endif %}{% endfor %}"
+        "{{'### End \n'}}"
+    )
 
 
 class Dataset(BaseModel):
@@ -146,3 +165,19 @@ class FinetuneConfig(BaseModel):
     General: General
     Dataset: Dataset
     Training: Training
+
+
+base_models: Dict[str, FinetuneConfig] = {}
+_models: Dict[str, FinetuneConfig] = {}
+
+_cur = os.path.dirname(os.path.abspath(__file__))
+_models_folder = _cur + "/models"
+for model_file in os.listdir(_models_folder):
+    file_path = _models_folder + "/" + model_file
+    if os.path.isdir(file_path):
+        continue
+    with open(file_path, "r") as f:
+        m: FinetuneConfig = parse_yaml_raw_as(FinetuneConfig, f)
+        _models[m.General.base_model] = m
+
+all_models = _models.copy()