From 90819066b498da3c807357627a1bfcf5d40306d0 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 1 Apr 2024 02:29:30 +0000
Subject: [PATCH 01/24] implement fine-tuning chat template function

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py | 91 +++++--------------
 llm_on_ray/finetune/finetune.py               |  3 +
 llm_on_ray/finetune/finetune.yaml             |  2 +
 llm_on_ray/finetune/finetune_config.py        |  9 +-
 4 files changed, 34 insertions(+), 71 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 37235b425..7c95b92ab 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -23,53 +23,9 @@
 
 from llm_on_ray.common.dataprocesser import DataProcesser
 
-INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
-INSTRUCTION_KEY = "### Instruction:"
-INPUT_KEY = "Input:"
 RESPONSE_KEY = "### Response:"
-END_KEY = "### End"
 RESPONSE_KEY_NL = f"{RESPONSE_KEY}\n"
 
-PROMPT_NO_INPUT_FORMAT = """{intro}
-
-{instruction_key}
-{instruction}
-
-{response_key}
-{response}
-
-{end_key}""".format(
-    intro=INTRO_BLURB,
-    instruction_key=INSTRUCTION_KEY,
-    instruction="{instruction}",
-    response_key=RESPONSE_KEY,
-    response="{response}",
-    end_key=END_KEY,
-)
-
-PROMPT_WITH_INPUT_FORMAT = """{intro}
-
-{instruction_key}
-{instruction}
-
-{input_key}
-{input}
-
-{response_key}
-{response}
-
-{end_key}""".format(
-    intro=INTRO_BLURB,
-    instruction_key=INSTRUCTION_KEY,
-    instruction="{instruction}",
-    input_key=INPUT_KEY,
-    input="{input}",
-    response_key=RESPONSE_KEY,
-    response="{response}",
-    end_key=END_KEY,
-)
-TEXT_COLUMN_NAME = "text"
-
 
 class DataCollatorForCompletionOnlyLM(transformers.DataCollatorForLanguageModeling):
     def torch_call(self, examples):
@@ -101,6 +57,7 @@ def torch_call(self, examples):
 class GeneralProcesser(DataProcesser):
     def tokenize_dataset(self, tokenizer, dataset):
         max_length = self.config.get("max_length")
+        custom_chat_template = self.config.get("custom_chat_template")
         group = self.config.get("group")
         block_size = self.config.get("block_size")
         tokenizer.pad_token = tokenizer.eos_token
@@ -111,35 +68,29 @@ def tokenize_dataset(self, tokenizer, dataset):
         if isinstance(dataset, datasets.DatasetDict):
             column_names = dataset["train"].column_names
 
-        if column_names and TEXT_COLUMN_NAME not in column_names:
-
-            def prompt(rec):
-                instruction = rec["instruction"]
-                response = rec["response"]
-                context = rec.get("context")
-                if not instruction:
-                    raise ValueError(f"Expected an instruction in: {rec}")
-                if not response:
-                    raise ValueError(f"Expected a response in: {rec}")
-                if context:
-                    rec["text"] = PROMPT_WITH_INPUT_FORMAT.format(
-                        instruction=instruction, response=response, input=context
+        def tokenize_function(examples):
+            if self.config.get("is_base_model"):
+                if custom_chat_template:
+                     new_tokenizer = tokenizer.apply_chat_template(
+                        examples,
+                        chat_template=custom_chat_template,
+                        tokenize=True,
+                        max_length=max_length,
                     )
                 else:
-                    rec["text"] = PROMPT_NO_INPUT_FORMAT.format(
-                        instruction=instruction, response=response
+                    new_tokenizer = tokenizer.apply_chat_template(
+                        examples,
+                        chat_template=self.config.get("default_chat_template"),
+                        tokenize=True,
+                        max_length=max_length,
                     )
-                return rec
-
-            dataset = dataset.map(
-                prompt,
-                load_from_cache_file=False,
-                desc="Prompt",
-            )
-            column_names += [TEXT_COLUMN_NAME]
-
-        def tokenize_function(examples):
-            return tokenizer(examples[TEXT_COLUMN_NAME], max_length=max_length)
+            else:
+                new_tokenizer = tokenizer.apply_chat_template(
+                    examples, tokenize=False, max_length=max_length
+                )
+            print(new_tokenizer)
+            print(new_tokenizer.default_chat_template)
+            return new_tokenizer
 
         tokenized_datasets = dataset.map(
             tokenize_function,
diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index 29d955a49..444b7649d 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -230,6 +230,9 @@ def train_func(config: Dict[str, Any]):
             "group": config["Dataset"].get("group", True),
             "block_size": config["Dataset"].get("block_size", 512),
             "shuffle": config["Dataset"].get("shuffle", False),
+            "name": tokenizer_name,
+            "config": config["General"]["config"],
+            "custom_chat_template": config["General"]["custom_chat_template"],
         }
     )
     tokenized_datasets = dataprocesser.tokenize_dataset(tokenizer, datasets)
diff --git a/llm_on_ray/finetune/finetune.yaml b/llm_on_ray/finetune/finetune.yaml
index 627a88753..3baf110c3 100644
--- a/llm_on_ray/finetune/finetune.yaml
+++ b/llm_on_ray/finetune/finetune.yaml
@@ -1,5 +1,6 @@
 General:
   base_model: EleutherAI/gpt-j-6b
+  is_base_model: false
   gpt_base_model: true
   output_dir: /tmp/llm-ray/output
   save_strategy: no
@@ -12,6 +13,7 @@ General:
     lora_alpha: 32
     lora_dropout: 0.1
   enable_gradient_checkpointing: false
+  custom_chat_template: null
 Dataset:
   train_file: examples/data/sample_finetune_data_small.jsonl
   group: true
diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
index 030fcc5a6..bbc814d28 100644
--- a/llm_on_ray/finetune/finetune_config.py
+++ b/llm_on_ray/finetune/finetune_config.py
@@ -17,7 +17,6 @@
 from pydantic import BaseModel, validator
 from typing import Optional, List
 
-
 PRECISION_BF16 = "bf16"
 PRECISION_FP16 = "fp16"
 PRECISION_NO = "no"
@@ -61,6 +60,14 @@ class General(BaseModel):
     lora_config: Optional[LoraConfig] = None
     deltatuner_config: Optional[DeltatunerConfig] = None
     enable_gradient_checkpointing: bool = False
+    custom_chat_template: Optional[str] = None
+    default_chat_template: str = (
+        "{{'### Below is an instruction that describes a task. "
+        "Write a response that appropriately completes the request. \n'}}"
+        "{% for message in messages %}{{'### Instruction: ' + message['instruction'] "
+        "+ ' Input:' + message['context'] + ' ### Response:' + message['response'] "
+        "+ '### End \n'}}{% endfor %}"
+    )
 
 
 class Dataset(BaseModel):

From 7f7d404d805d686d9af7bfdbeb9153eae4be92ca Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Tue, 2 Apr 2024 08:25:34 +0000
Subject: [PATCH 02/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 llm_on_ray/common/trainer/default_trainer.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llm_on_ray/common/trainer/default_trainer.py b/llm_on_ray/common/trainer/default_trainer.py
index 8825f08be..5509bc3a1 100644
--- a/llm_on_ray/common/trainer/default_trainer.py
+++ b/llm_on_ray/common/trainer/default_trainer.py
@@ -33,6 +33,7 @@
 class DefaultTrainer(Trainer):
     def __init__(self, config):
         self.model = None
+        self.tokenizer = None
         self.config = config
         dataprocesser_config = config.get("dataprocesser")
         dataprocesser_type = dataprocesser_config.get("type")
@@ -121,7 +122,7 @@ def _get_lr_scheduler(
 
     def prepare(self, model, tokenizer, dataset, optimizer, accelerator):
         self._coordinate(accelerator)
-
+        self.tokenizer = tokenizer
         embedding_size = model.get_input_embeddings().weight.shape[0]
         logger.info(f"model embedding size: {embedding_size}")
         if len(tokenizer) > embedding_size:
@@ -290,6 +291,11 @@ def train(self):
                 is_main_process=self.accelerator.is_main_process,
                 save_function=self.accelerator.save,
             )
+            self.tokenizer.save_pretrained(
+                output,
+                is_main_process=self.accelerator.is_main_process,
+                save_function=self.accelerator.save,
+            )
             logger.info(f"finish save model to {output}")
 
         self.accelerator.wait_for_everyone()

From a3ce22feeca6ae7025307707a104022af66ef224 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 8 Apr 2024 05:41:44 +0000
Subject: [PATCH 03/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py | 41 ++++++++++++++-----
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 7c95b92ab..bffebbd68 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -58,6 +58,8 @@ class GeneralProcesser(DataProcesser):
     def tokenize_dataset(self, tokenizer, dataset):
         max_length = self.config.get("max_length")
         custom_chat_template = self.config.get("custom_chat_template")
+        model_default_chat_template = self.config.get("model_default_chat_template")
+
         group = self.config.get("group")
         block_size = self.config.get("block_size")
         tokenizer.pad_token = tokenizer.eos_token
@@ -71,25 +73,44 @@ def tokenize_dataset(self, tokenizer, dataset):
         def tokenize_function(examples):
             if self.config.get("is_base_model"):
                 if custom_chat_template:
-                     new_tokenizer = tokenizer.apply_chat_template(
+                    tokenizer.chat_template = custom_chat_template
+                    new_tokenizer = tokenizer.apply_chat_template(
                         examples,
-                        chat_template=custom_chat_template,
-                        tokenize=True,
+                        tokenize=False,
                         max_length=max_length,
                     )
                 else:
+                    tokenizer.chat_template = self.config.get("default_chat_template")
                     new_tokenizer = tokenizer.apply_chat_template(
                         examples,
-                        chat_template=self.config.get("default_chat_template"),
-                        tokenize=True,
+                        tokenize=False,
                         max_length=max_length,
                     )
             else:
-                new_tokenizer = tokenizer.apply_chat_template(
-                    examples, tokenize=False, max_length=max_length
-                )
-            print(new_tokenizer)
-            print(new_tokenizer.default_chat_template)
+                if model_default_chat_template:
+                    tokenizer.chat_template = model_default_chat_template
+                    new_tokenizer = tokenizer.apply_chat_template(
+                        examples,
+                        tokenize=False,
+                        max_length=max_length,
+                    )
+                else:
+                    new_messages = [
+                        {
+                            "role": "user",
+                            "content": "instruction: "
+                            + examples["instruction"]
+                            + " context: "
+                            + examples["context"],
+                        },
+                        {"role": "assistant", "content": "response: " + examples["response"]},
+                    ]
+
+                    new_tokenizer = tokenizer.apply_chat_template(
+                        new_messages,
+                        tokenize=False,
+                        max_length=max_length,
+                    )
             return new_tokenizer
 
         tokenized_datasets = dataset.map(

From b10cda384d0f5387bb7e9c8078c698c29923d87f Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 8 Apr 2024 05:59:44 +0000
Subject: [PATCH 04/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 llm_on_ray/finetune/finetune.py        | 45 +++++++++++++++++++++++++-
 llm_on_ray/finetune/finetune_config.py | 11 ++++---
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index 444b7649d..cc6018442 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -232,7 +232,6 @@ def train_func(config: Dict[str, Any]):
             "shuffle": config["Dataset"].get("shuffle", False),
             "name": tokenizer_name,
             "config": config["General"]["config"],
-            "custom_chat_template": config["General"]["custom_chat_template"],
         }
     )
     tokenized_datasets = dataprocesser.tokenize_dataset(tokenizer, datasets)
@@ -284,6 +283,50 @@ def train_func(config: Dict[str, Any]):
             tokenizer=tokenizer,
             data_collator=data_collator,
         )
+    trainer = common.trainer.Trainer.registory.get("DefaultTrainer")(
+        config={
+            "device": config["Training"]["device"],
+            "accelerate_mode": config["Training"]["accelerate_mode"],
+            "num_train_epochs": epochs,
+            "max_train_steps": config["Training"].get("max_train_steps", None),
+            "logging_steps": config["Training"].get("logging_steps", 1),
+            "output": output_dir,
+            "dataprocesser": {
+                "type": "GeneralProcesser",
+                "per_device_train_batch_size": config["Training"]["batch_size"],
+                "per_device_eval_batch_size": config["Training"]["batch_size"],
+                "preprocessing_num_workers": config["Dataset"].get("preprocessing_num_workers", 1),
+                "max_length": config["Dataset"].get("max_length", 512),
+                "group": config["Dataset"].get("group", True),
+                "block_size": config["Dataset"].get("block_size", 512),
+                "shuffle": config["Dataset"].get("shuffle", False),
+                "is_base_model": config["General"]["is_base_model"],
+                "custom_chat_template": config["General"]["custom_chat_template"],
+                "default_chat_template": config["General"]["default_chat_template"],
+                "model_default_chat_template": config["General"]["model_default_chat_template"],
+            },
+            "lr_scheduler": {
+                "enable": True,
+                "max_train_steps": None,
+                "lr_scheduler_type": config["Training"]["lr_scheduler"],
+                "num_warmup_steps": 0,
+                "learning_rate": config["Training"]["learning_rate"],
+                "weight_decay": config["Training"]["weight_decay"],
+            },
+            "checkpoint": {
+                "root_path": config["General"].get("checkpoint_dir", None),
+            },
+        }
+    )
+
+    try:
+        common.logger.info("trainer prepare start")
+        model.training = True
+        trainer.prepare(model, tokenizer, datasets, optimizer, accelerator)
+    except Exception as e:
+        common.logger.critical(e, exc_info=True)
+        exit(1)
+    common.logger.info("trainer prepare finish")
 
         common.logger.info("train start")
         trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
index bbc814d28..e5cd5dcf7 100644
--- a/llm_on_ray/finetune/finetune_config.py
+++ b/llm_on_ray/finetune/finetune_config.py
@@ -62,12 +62,13 @@ class General(BaseModel):
     enable_gradient_checkpointing: bool = False
     custom_chat_template: Optional[str] = None
     default_chat_template: str = (
-        "{{'### Below is an instruction that describes a task. "
-        "Write a response that appropriately completes the request. \n'}}"
-        "{% for message in messages %}{{'### Instruction: ' + message['instruction'] "
-        "+ ' Input:' + message['context'] + ' ### Response:' + message['response'] "
-        "+ '### End \n'}}{% endfor %}"
+        "{{'### Below is an instruction that describes a task."
+        "Write a response that appropriately completes the request. '}}"
+        "{{'### Instruction: ' + messages['instruction'] "
+        "+ ' Input:' + messages['context'] + ' ### Response:' + messages['response'] "
+        "+ '### End \n'}}"
     )
+    model_default_chat_template: Optional[str] = None
 
 
 class Dataset(BaseModel):

From 049304ab8abcfb04765ebc597580e454ccc50fa6 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 8 Apr 2024 06:18:40 +0000
Subject: [PATCH 05/24] integrate gbt for transformer 4.26.0

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py | 61 +++++++++++++++++++
 llm_on_ray/finetune/finetune.py               |  1 +
 2 files changed, 62 insertions(+)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index bffebbd68..9ac21f979 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -23,9 +23,53 @@
 
 from llm_on_ray.common.dataprocesser import DataProcesser
 
+INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+INSTRUCTION_KEY = "### Instruction:"
+INPUT_KEY = "Input:"
 RESPONSE_KEY = "### Response:"
+END_KEY = "### End"
 RESPONSE_KEY_NL = f"{RESPONSE_KEY}\n"
 
+PROMPT_NO_INPUT_FORMAT = """{intro}
+
+{instruction_key}
+{instruction}
+
+{response_key}
+{response}
+
+{end_key}""".format(
+    intro=INTRO_BLURB,
+    instruction_key=INSTRUCTION_KEY,
+    instruction="{instruction}",
+    response_key=RESPONSE_KEY,
+    response="{response}",
+    end_key=END_KEY,
+)
+
+PROMPT_WITH_INPUT_FORMAT = """{intro}
+
+{instruction_key}
+{instruction}
+
+{input_key}
+{input}
+
+{response_key}
+{response}
+
+{end_key}""".format(
+    intro=INTRO_BLURB,
+    instruction_key=INSTRUCTION_KEY,
+    instruction="{instruction}",
+    input_key=INPUT_KEY,
+    input="{input}",
+    response_key=RESPONSE_KEY,
+    response="{response}",
+    end_key=END_KEY,
+)
+TEXT_COLUMN_NAME = "text"
+
 
 class DataCollatorForCompletionOnlyLM(transformers.DataCollatorForLanguageModeling):
     def torch_call(self, examples):
@@ -71,6 +115,23 @@ def tokenize_dataset(self, tokenizer, dataset):
             column_names = dataset["train"].column_names
 
         def tokenize_function(examples):
+            if self.config.get("gpt_base_model"):
+                instruction = examples["instruction"]
+                response = examples["response"]
+                context = examples.get("context")
+                if not instruction:
+                    raise ValueError(f"Expected an instruction in: {examples}")
+                if not response:
+                    raise ValueError(f"Expected a response in: {examples}")
+                if context:
+                    examples["text"] = PROMPT_WITH_INPUT_FORMAT.format(
+                        instruction=instruction, response=response, input=context
+                    )
+                else:
+                    examples["text"] = PROMPT_NO_INPUT_FORMAT.format(
+                        instruction=instruction, response=response
+                    )
+                return tokenizer(examples["text"], max_length=max_length, truncation=True)
             if self.config.get("is_base_model"):
                 if custom_chat_template:
                     tokenizer.chat_template = custom_chat_template
diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index cc6018442..fdd079838 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -300,6 +300,7 @@ def train_func(config: Dict[str, Any]):
                 "group": config["Dataset"].get("group", True),
                 "block_size": config["Dataset"].get("block_size", 512),
                 "shuffle": config["Dataset"].get("shuffle", False),
+                "gpt_base_model": config["General"].get("gpt_base_model", False),
                 "is_base_model": config["General"]["is_base_model"],
                 "custom_chat_template": config["General"]["custom_chat_template"],
                 "default_chat_template": config["General"]["default_chat_template"],

From 63a121749073c32a749181c0f26f0cec20f6f411 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 8 Apr 2024 06:40:23 +0000
Subject: [PATCH 06/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py | 85 ++++++++++---------
 1 file changed, 43 insertions(+), 42 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 9ac21f979..df539835b 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -124,55 +124,56 @@ def tokenize_function(examples):
                 if not response:
                     raise ValueError(f"Expected a response in: {examples}")
                 if context:
-                    examples["text"] = PROMPT_WITH_INPUT_FORMAT.format(
+                    new_message = PROMPT_WITH_INPUT_FORMAT.format(
                         instruction=instruction, response=response, input=context
                     )
                 else:
-                    examples["text"] = PROMPT_NO_INPUT_FORMAT.format(
+                    new_message = PROMPT_NO_INPUT_FORMAT.format(
                         instruction=instruction, response=response
                     )
-                return tokenizer(examples["text"], max_length=max_length, truncation=True)
-            if self.config.get("is_base_model"):
-                if custom_chat_template:
-                    tokenizer.chat_template = custom_chat_template
-                    new_tokenizer = tokenizer.apply_chat_template(
-                        examples,
-                        tokenize=False,
-                        max_length=max_length,
-                    )
-                else:
-                    tokenizer.chat_template = self.config.get("default_chat_template")
-                    new_tokenizer = tokenizer.apply_chat_template(
-                        examples,
-                        tokenize=False,
-                        max_length=max_length,
-                    )
+                return tokenizer.tokenize(new_message, max_length=max_length)
             else:
-                if model_default_chat_template:
-                    tokenizer.chat_template = model_default_chat_template
-                    new_tokenizer = tokenizer.apply_chat_template(
-                        examples,
-                        tokenize=False,
-                        max_length=max_length,
-                    )
+                if self.config.get("is_base_model"):
+                    if custom_chat_template:
+                        tokenizer.chat_template = custom_chat_template
+                        new_tokenizer = tokenizer.apply_chat_template(
+                            examples,
+                            tokenize=False,
+                            max_length=max_length,
+                        )
+                    else:
+                        tokenizer.chat_template = self.config.get("default_chat_template")
+                        new_tokenizer = tokenizer.apply_chat_template(
+                            examples,
+                            tokenize=False,
+                            max_length=max_length,
+                        )
                 else:
-                    new_messages = [
-                        {
-                            "role": "user",
-                            "content": "instruction: "
-                            + examples["instruction"]
-                            + " context: "
-                            + examples["context"],
-                        },
-                        {"role": "assistant", "content": "response: " + examples["response"]},
-                    ]
-
-                    new_tokenizer = tokenizer.apply_chat_template(
-                        new_messages,
-                        tokenize=False,
-                        max_length=max_length,
-                    )
-            return new_tokenizer
+                    if model_default_chat_template:
+                        tokenizer.chat_template = model_default_chat_template
+                        new_tokenizer = tokenizer.apply_chat_template(
+                            examples,
+                            tokenize=False,
+                            max_length=max_length,
+                        )
+                    else:
+                        new_messages = [
+                            {
+                                "role": "user",
+                                "content": "instruction: "
+                                + examples["instruction"]
+                                + " context: "
+                                + examples["context"],
+                            },
+                            {"role": "assistant", "content": "response: " + examples["response"]},
+                        ]
+
+                        new_tokenizer = tokenizer.apply_chat_template(
+                            new_messages,
+                            tokenize=False,
+                            max_length=max_length,
+                        )
+                return new_tokenizer
 
         tokenized_datasets = dataset.map(
             tokenize_function,

From 58c95847a5ad0f842d1c8800b417037b6394216a Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 8 Apr 2024 08:56:23 +0000
Subject: [PATCH 07/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 llm_on_ray/common/dataprocesser/general_processer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index df539835b..f752dc9ac 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -131,7 +131,7 @@ def tokenize_function(examples):
                     new_message = PROMPT_NO_INPUT_FORMAT.format(
                         instruction=instruction, response=response
                     )
-                return tokenizer.tokenize(new_message, max_length=max_length)
+                return tokenizer(new_message, max_length=max_length)
             else:
                 if self.config.get("is_base_model"):
                     if custom_chat_template:
@@ -173,7 +173,7 @@ def tokenize_function(examples):
                             tokenize=False,
                             max_length=max_length,
                         )
-                return new_tokenizer
+                return tokenizer(new_tokenizer, max_length=max_length)
 
         tokenized_datasets = dataset.map(
             tokenize_function,

From e2193cab144164305c4ed30ab4d9f9707ac99de0 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Tue, 9 Apr 2024 07:10:41 +0000
Subject: [PATCH 08/24] 1. remove is_base_model tag 2. modify chat template

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py | 72 ++++++++-----------
 llm_on_ray/finetune/finetune.py               |  4 +-
 llm_on_ray/finetune/finetune.yaml             |  1 -
 llm_on_ray/finetune/finetune_config.py        | 22 ++++--
 llm_on_ray/finetune/models/mpt-7b.yaml        |  1 +
 5 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index f752dc9ac..3c39eb429 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -102,7 +102,6 @@ class GeneralProcesser(DataProcesser):
     def tokenize_dataset(self, tokenizer, dataset):
         max_length = self.config.get("max_length")
         custom_chat_template = self.config.get("custom_chat_template")
-        model_default_chat_template = self.config.get("model_default_chat_template")
 
         group = self.config.get("group")
         block_size = self.config.get("block_size")
@@ -133,46 +132,37 @@ def tokenize_function(examples):
                     )
                 return tokenizer(new_message, max_length=max_length)
             else:
-                if self.config.get("is_base_model"):
-                    if custom_chat_template:
-                        tokenizer.chat_template = custom_chat_template
-                        new_tokenizer = tokenizer.apply_chat_template(
-                            examples,
-                            tokenize=False,
-                            max_length=max_length,
-                        )
-                    else:
-                        tokenizer.chat_template = self.config.get("default_chat_template")
-                        new_tokenizer = tokenizer.apply_chat_template(
-                            examples,
-                            tokenize=False,
-                            max_length=max_length,
-                        )
+                new_messages = [
+                    {
+                        "role": "user",
+                        "content": INTRO_BLURB + "\n\n"
+                                   + "###Instruction:\n"
+                                   + examples["instruction"] + "\n\n"
+                                   + "###context:\n"
+                                   + examples["context"] + "\n\n",
+                    },
+                    {"role": "assistant", "content": examples["response"]},
+                ]
+                if custom_chat_template:
+                    tokenizer.chat_template = custom_chat_template
+                    new_tokenizer = tokenizer.apply_chat_template(
+                        new_messages,
+                        tokenize=False,
+                        max_length=max_length,
+                    )
+                elif tokenizer.chat_template is not None:
+                    new_tokenizer = tokenizer.apply_chat_template(
+                        new_messages,
+                        tokenize=False,
+                        max_length=max_length,
+                    )
                 else:
-                    if model_default_chat_template:
-                        tokenizer.chat_template = model_default_chat_template
-                        new_tokenizer = tokenizer.apply_chat_template(
-                            examples,
-                            tokenize=False,
-                            max_length=max_length,
-                        )
-                    else:
-                        new_messages = [
-                            {
-                                "role": "user",
-                                "content": "instruction: "
-                                + examples["instruction"]
-                                + " context: "
-                                + examples["context"],
-                            },
-                            {"role": "assistant", "content": "response: " + examples["response"]},
-                        ]
-
-                        new_tokenizer = tokenizer.apply_chat_template(
-                            new_messages,
-                            tokenize=False,
-                            max_length=max_length,
-                        )
+                    tokenizer.chat_template = self.config.get("chat_template")
+                    new_tokenizer = tokenizer.apply_chat_template(
+                        new_messages,
+                        tokenize=False,
+                        max_length=max_length,
+                    )
                 return tokenizer(new_tokenizer, max_length=max_length)
 
         tokenized_datasets = dataset.map(
@@ -194,7 +184,7 @@ def group_texts(examples):
                     total_length = (total_length // block_size) * block_size
                 # Split by chunks of max_len.
                 result = {
-                    k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+                    k: [t[i: i + block_size] for i in range(0, total_length, block_size)]
                     for k, t in concatenated_examples.items()
                 }
                 result["labels"] = result["input_ids"].copy()
diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index fdd079838..5bcff4ad9 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -301,10 +301,8 @@ def train_func(config: Dict[str, Any]):
                 "block_size": config["Dataset"].get("block_size", 512),
                 "shuffle": config["Dataset"].get("shuffle", False),
                 "gpt_base_model": config["General"].get("gpt_base_model", False),
-                "is_base_model": config["General"]["is_base_model"],
                 "custom_chat_template": config["General"]["custom_chat_template"],
-                "default_chat_template": config["General"]["default_chat_template"],
-                "model_default_chat_template": config["General"]["model_default_chat_template"],
+                "chat_template": config["General"]["chat_template"],
             },
             "lr_scheduler": {
                 "enable": True,
diff --git a/llm_on_ray/finetune/finetune.yaml b/llm_on_ray/finetune/finetune.yaml
index 3baf110c3..15b38501f 100644
--- a/llm_on_ray/finetune/finetune.yaml
+++ b/llm_on_ray/finetune/finetune.yaml
@@ -1,6 +1,5 @@
 General:
   base_model: EleutherAI/gpt-j-6b
-  is_base_model: false
   gpt_base_model: true
   output_dir: /tmp/llm-ray/output
   save_strategy: no
diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
index e5cd5dcf7..11ae93ce0 100644
--- a/llm_on_ray/finetune/finetune_config.py
+++ b/llm_on_ray/finetune/finetune_config.py
@@ -61,14 +61,22 @@ class General(BaseModel):
     deltatuner_config: Optional[DeltatunerConfig] = None
     enable_gradient_checkpointing: bool = False
     custom_chat_template: Optional[str] = None
-    default_chat_template: str = (
-        "{{'### Below is an instruction that describes a task."
-        "Write a response that appropriately completes the request. '}}"
-        "{{'### Instruction: ' + messages['instruction'] "
-        "+ ' Input:' + messages['context'] + ' ### Response:' + messages['response'] "
-        "+ '### End \n'}}"
+    chat_template: Optional[str] = (
+        "{{ bos_token }}"
+        "{% if messages[0]['role'] == 'system' %}"
+        "{{ raise_exception('System role not supported') }}"
+        "{% endif %}"
+        "{% for message in messages %}"
+        "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
+        "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
+        "{% endif %}"
+        "{% if message['role'] == 'user' %}"
+        "{{ '### Instruction: ' + message['content'] + eos_token }}"
+        "{% elif message['role'] == 'assistant' %}"
+        "{{ '### Response:'  + message['content'] + eos_token }}"
+        "{% endif %}{% endfor %}"
+        "{{'### End \n'}}"
     )
-    model_default_chat_template: Optional[str] = None
 
 
 class Dataset(BaseModel):
diff --git a/llm_on_ray/finetune/models/mpt-7b.yaml b/llm_on_ray/finetune/models/mpt-7b.yaml
index ef2efa006..8e719f186 100644
--- a/llm_on_ray/finetune/models/mpt-7b.yaml
+++ b/llm_on_ray/finetune/models/mpt-7b.yaml
@@ -1,6 +1,7 @@
 General:
   base_model: mosaicml/mpt-7b
   tokenizer_name: EleutherAI/gpt-neox-20b
+  is_base_model: false
   gpt_base_model: false
   output_dir: /tmp/llm-ray/output
   save_strategy: no

From 1090bf0ec5168bbf06e4d5fa0057cbfc81dcc693 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Wed, 10 Apr 2024 01:25:29 +0000
Subject: [PATCH 09/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py | 118 +++++++++---------
 pyproject.toml                                |   3 +-
 2 files changed, 64 insertions(+), 57 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 3c39eb429..5782fecd9 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -99,9 +99,67 @@ def torch_call(self, examples):
 
 
 class GeneralProcesser(DataProcesser):
-    def tokenize_dataset(self, tokenizer, dataset):
-        max_length = self.config.get("max_length")
-        custom_chat_template = self.config.get("custom_chat_template")
+    def tokenize_function(self, examples, tokenizer):
+        print(examples)
+        if self.config.get("gpt_base_model"):
+            instruction = examples["instruction"]
+            response = examples["response"]
+            context = examples.get("context")
+            if not instruction:
+                raise ValueError(f"Expected an instruction in: {examples}")
+            if not response:
+                raise ValueError(f"Expected a response in: {examples}")
+            if context:
+                new_message = PROMPT_WITH_INPUT_FORMAT.format(
+                    instruction=instruction, response=response, input=context
+                )
+            else:
+                new_message = PROMPT_NO_INPUT_FORMAT.format(
+                    instruction=instruction, response=response
+                )
+            return tokenizer(new_message, max_length=self.config.get("max_length"))
+        else:
+            new_messages = [
+                {
+                    "role": "user",
+                    "content": "###Instruction:\n"
+                            + examples["instruction"] + "\n\n"
+                            + "###context:\n"
+                            + examples["context"] + "\n\n",
+                },
+                {"role": "assistant", "content": examples["response"] + "\n\n"},
+            ]
+            print(new_messages)
+            if self.config.get("custom_chat_template") is not None:
+                print("custom_chat_template")
+                tokenizer.chat_template = self.config.get("custom_chat_template")
+                new_tokenizer = tokenizer.apply_chat_template(
+                    new_messages,
+                    tokenize=False,
+                    max_length=self.config.get("max_length"),
+                )
+            elif tokenizer.chat_template is not None:
+                print("tokenizer.chat_template")
+                new_tokenizer = tokenizer.apply_chat_template(
+                    new_messages,
+                    tokenize=False,
+                    max_length=self.config.get("max_length"),
+                )
+            else:
+                print("chat_template")
+                tokenizer.chat_template = self.config.get("chat_template")
+                new_tokenizer = tokenizer.apply_chat_template(
+                    new_messages,
+                    tokenize=False,
+                    max_length=self.config.get("max_length"),
+                )
+            tokenizer = tokenizer(new_tokenizer, max_length=self.config.get("max_length"))
+            print(tokenizer)
+            return tokenizer
+
+    def prepare(self, tokenizer, dataset):
+        per_device_train_batch_size = self.config.get("per_device_train_batch_size")
+        per_device_eval_batch_size = self.config.get("per_device_eval_batch_size")
 
         group = self.config.get("group")
         block_size = self.config.get("block_size")
@@ -113,60 +171,8 @@ def tokenize_dataset(self, tokenizer, dataset):
         if isinstance(dataset, datasets.DatasetDict):
             column_names = dataset["train"].column_names
 
-        def tokenize_function(examples):
-            if self.config.get("gpt_base_model"):
-                instruction = examples["instruction"]
-                response = examples["response"]
-                context = examples.get("context")
-                if not instruction:
-                    raise ValueError(f"Expected an instruction in: {examples}")
-                if not response:
-                    raise ValueError(f"Expected a response in: {examples}")
-                if context:
-                    new_message = PROMPT_WITH_INPUT_FORMAT.format(
-                        instruction=instruction, response=response, input=context
-                    )
-                else:
-                    new_message = PROMPT_NO_INPUT_FORMAT.format(
-                        instruction=instruction, response=response
-                    )
-                return tokenizer(new_message, max_length=max_length)
-            else:
-                new_messages = [
-                    {
-                        "role": "user",
-                        "content": INTRO_BLURB + "\n\n"
-                                   + "###Instruction:\n"
-                                   + examples["instruction"] + "\n\n"
-                                   + "###context:\n"
-                                   + examples["context"] + "\n\n",
-                    },
-                    {"role": "assistant", "content": examples["response"]},
-                ]
-                if custom_chat_template:
-                    tokenizer.chat_template = custom_chat_template
-                    new_tokenizer = tokenizer.apply_chat_template(
-                        new_messages,
-                        tokenize=False,
-                        max_length=max_length,
-                    )
-                elif tokenizer.chat_template is not None:
-                    new_tokenizer = tokenizer.apply_chat_template(
-                        new_messages,
-                        tokenize=False,
-                        max_length=max_length,
-                    )
-                else:
-                    tokenizer.chat_template = self.config.get("chat_template")
-                    new_tokenizer = tokenizer.apply_chat_template(
-                        new_messages,
-                        tokenize=False,
-                        max_length=max_length,
-                    )
-                return tokenizer(new_tokenizer, max_length=max_length)
-
         tokenized_datasets = dataset.map(
-            tokenize_function,
+            lambda examples: self.tokenize_function(examples, tokenizer),
             remove_columns=column_names,
             load_from_cache_file=False,
             desc="Tokenize dataset",
diff --git a/pyproject.toml b/pyproject.toml
index b319045cc..a18574675 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,8 @@ dependencies = [
     "py-cpuinfo",
     "pydantic-yaml",
     "async_timeout",
-    "typer"
+    "typer",
+    "jinja2>=3.0.0"
 ]
 
 [project.optional-dependencies]

From 6bdd664eab764fb20025e210aed206fead3cb302 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Wed, 10 Apr 2024 08:32:22 +0000
Subject: [PATCH 10/24] 1. update doc/finetune_parameters.md 2. add unit test

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 docs/finetune_parameters.md                   |   2 +
 .../common/dataprocesser/general_processer.py |  15 +-
 tests/finetune/test_chat_template.py          | 139 ++++++++++++++++++
 3 files changed, 145 insertions(+), 11 deletions(-)
 create mode 100644 tests/finetune/test_chat_template.py

diff --git a/docs/finetune_parameters.md b/docs/finetune_parameters.md
index 4f113e69f..d80b6e46d 100644
--- a/docs/finetune_parameters.md
+++ b/docs/finetune_parameters.md
@@ -16,6 +16,8 @@ The following are the parameters supported in the finetuning workflow.
 |lora_config|task_type: CAUSAL_LM<br>r: 8<br>lora_alpha: 32<br>lora_dropout: 0.1|Will be passed to the LoraConfig `__init__()` method, then it'll be used as config to build Peft model object.|
 |deltatuner_config|"algo": "lora"<br>"denas": True<br>"best_model_structure": "/path/to/best_structure_of_deltatuner_model"|Will be passed to the DeltaTunerArguments `__init__()` method, then it'll be used as config to build [Deltatuner model](https://github.com/intel/e2eAIOK/tree/main/e2eAIOK/deltatuner) object.|
 |enable_gradient_checkpointing|False|enable gradient checkpointing to save GPU memory, but will cost more compute runtime|
+|chat_template|"{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + eos_token }}{{ '### Response:'  + message['content'] + eos_token }}{% endif %}{% endfor %}{{'### End \n'}}"|LLM-on-Ray default chat default.|
+|custom_chat_template|None|User-defined chat template.|
 
 
 ## Dataset Parameters
diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 5782fecd9..64b0c75d3 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -100,7 +100,6 @@ def torch_call(self, examples):
 
 class GeneralProcesser(DataProcesser):
     def tokenize_function(self, examples, tokenizer):
-        print(examples)
         if self.config.get("gpt_base_model"):
             instruction = examples["instruction"]
             response = examples["response"]
@@ -117,7 +116,7 @@ def tokenize_function(self, examples, tokenizer):
                 new_message = PROMPT_NO_INPUT_FORMAT.format(
                     instruction=instruction, response=response
                 )
-            return tokenizer(new_message, max_length=self.config.get("max_length"))
+            return tokenizer(new_message, add_special_tokens=False, max_length=self.config.get("max_length"))
         else:
             new_messages = [
                 {
@@ -129,32 +128,26 @@ def tokenize_function(self, examples, tokenizer):
                 },
                 {"role": "assistant", "content": examples["response"] + "\n\n"},
             ]
-            print(new_messages)
             if self.config.get("custom_chat_template") is not None:
-                print("custom_chat_template")
                 tokenizer.chat_template = self.config.get("custom_chat_template")
                 new_tokenizer = tokenizer.apply_chat_template(
                     new_messages,
                     tokenize=False,
-                    max_length=self.config.get("max_length"),
                 )
             elif tokenizer.chat_template is not None:
-                print("tokenizer.chat_template")
                 new_tokenizer = tokenizer.apply_chat_template(
                     new_messages,
                     tokenize=False,
-                    max_length=self.config.get("max_length"),
                 )
             else:
-                print("chat_template")
                 tokenizer.chat_template = self.config.get("chat_template")
                 new_tokenizer = tokenizer.apply_chat_template(
                     new_messages,
                     tokenize=False,
-                    max_length=self.config.get("max_length"),
                 )
-            tokenizer = tokenizer(new_tokenizer, max_length=self.config.get("max_length"))
-            print(tokenizer)
+            tokenizer = tokenizer(new_tokenizer,
+                                  add_special_tokens=False,
+                                  max_length=self.config.get("max_length"))
             return tokenizer
 
     def prepare(self, tokenizer, dataset):
diff --git a/tests/finetune/test_chat_template.py b/tests/finetune/test_chat_template.py
new file mode 100644
index 000000000..7cdda115c
--- /dev/null
+++ b/tests/finetune/test_chat_template.py
@@ -0,0 +1,139 @@
+import unittest
+
+import transformers
+from transformers import AutoTokenizer
+from llm_on_ray.common.dataprocesser.general_processer import GeneralProcesser
+
+
+class TestTokenizeFunction(unittest.TestCase):
+    def setUp(self):
+        self.tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf')
+        self.config = {
+            'gpt_base_model': True,
+            'max_length': 512,
+            'trust_remote_code': False,
+            'chat_template': "Below is an instruction that describes a task. Write a response that appropriately "
+                             "completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
+                             "'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
+                             "'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
+                             "must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
+                             "== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
+                             "'assistant' %}{{ '### Response: '  + message['content'] }}{% endif %}{% endfor %}{{'### "
+                             "End \n'}}",
+        }
+        self.processer = GeneralProcesser(self.config)
+
+    def test_tokenize_function_with_gpt_model(self):
+        self.tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-j-6b')
+
+        examples = \
+            {
+                "instruction": "Test instruction",
+                "response": "Test response",
+                "context": "Test context",
+            }
+
+        # Verify the format of the result
+        expected_result = 'Below is an instruction that describes a task. Write a response that '\
+                          'appropriately completes the request.\n'\
+                          '\n'\
+                          '### Instruction:\n'\
+                          'Test instruction\n'\
+                          '\n'\
+                          'Input:\n'\
+                          'Test context\n'\
+                          '\n'\
+                          '### Response:\n'\
+                          'Test response\n'\
+                          '\n'\
+                          '### End'
+
+        result = self.processer.tokenize_function(examples, self.tokenizer)
+        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+
+    def test_tokenize_function_with_custom_chat_template(self):
+        examples = \
+            {
+                "instruction": "Test instruction",
+                "response": "Test response",
+                "context": "Test context",
+            }
+
+        # Verify the format of the result
+        expected_result = '<|im_start|>user\n' \
+                          '###Instruction:\n' \
+                          'Test instruction\n' \
+                          '\n' \
+                          '###context:\n' \
+                          'Test context\n' \
+                          '\n' \
+                          '<|im_end|><|im_start|>assistant\n' \
+                          'Test response\n' \
+                          '\n' \
+                          '<|im_end|>'
+        # Set custom chat template
+        self.config['custom_chat_template'] = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n'"\
+                                              "+ message['content'] + '<|im_end|>'}}{% endfor %}"
+
+        self.config['gpt_base_model'] = False
+        result = self.processer.tokenize_function(examples, self.tokenizer)
+        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+
+    def test_tokenize_function_with_chat_template(self):
+        examples = \
+            {
+                "instruction": "Test instruction",
+                "response": "Test response",
+                "context": "Test context",
+            }
+
+        # Verify the format of the result
+        expected_result = 'Below is an instruction that describes a task. Write a response that '\
+                          'appropriately completes the request\n'\
+                          '### Instruction: ###Instruction:\n'\
+                          'Test instruction\n'\
+                          '\n'\
+                          '###context:\n'\
+                          'Test context\n'\
+                          '\n'\
+                          '### Response: Test response\n'\
+                          '\n'\
+                          '### End \n'\
+
+        self.config['gpt_base_model'] = False
+        result = self.processer.tokenize_function(examples, self.tokenizer)
+        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+
+    def test_tokenize_function_with_default_chat_template(self):
+        self.tokenizer = AutoTokenizer.from_pretrained('google/gemma-2b-it')
+        examples = \
+            {
+                "instruction": "Test instruction",
+                "response": "Test response",
+                "context": "Test context",
+            }
+
+        chat_example = [
+            {
+                "role": "user",
+                "content": "###Instruction:\nTest instruction\n\n###context:\nTest context\n\n",
+
+            },
+            {
+                "role": "assistant",
+                "content": "Test response\n\n",
+            }
+        ]
+
+        # Verify the format of the result
+        expected_result = self.tokenizer.apply_chat_template(chat_example,
+                                                             tokenize=False,
+                                                             max_length=self.config.get("max_length"))
+
+        self.config['gpt_base_model'] = False
+        result = self.processer.tokenize_function(examples, self.tokenizer)
+        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 4f0d118bb131a1cc64026525d4b88f33bdadfc48 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Wed, 10 Apr 2024 08:44:21 +0000
Subject: [PATCH 11/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 docs/finetune_parameters.md                   |  3 +-
 .../common/dataprocesser/general_processer.py |  6 +-
 llm_on_ray/finetune/finetune.py               | 55 ++++---------------
 llm_on_ray/finetune/finetune_config.py        |  4 +-
 4 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/docs/finetune_parameters.md b/docs/finetune_parameters.md
index d80b6e46d..cebb1449e 100644
--- a/docs/finetune_parameters.md
+++ b/docs/finetune_parameters.md
@@ -16,8 +16,7 @@ The following are the parameters supported in the finetuning workflow.
 |lora_config|task_type: CAUSAL_LM<br>r: 8<br>lora_alpha: 32<br>lora_dropout: 0.1|Will be passed to the LoraConfig `__init__()` method, then it'll be used as config to build Peft model object.|
 |deltatuner_config|"algo": "lora"<br>"denas": True<br>"best_model_structure": "/path/to/best_structure_of_deltatuner_model"|Will be passed to the DeltaTunerArguments `__init__()` method, then it'll be used as config to build [Deltatuner model](https://github.com/intel/e2eAIOK/tree/main/e2eAIOK/deltatuner) object.|
 |enable_gradient_checkpointing|False|enable gradient checkpointing to save GPU memory, but will cost more compute runtime|
-|chat_template|"{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + eos_token }}{{ '### Response:'  + message['content'] + eos_token }}{% endif %}{% endfor %}{{'### End \n'}}"|LLM-on-Ray default chat default.|
-|custom_chat_template|None|User-defined chat template.|
+|chat_template|None|User-defined chat template.|
 
 
 ## Dataset Parameters
diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 64b0c75d3..636ec006c 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -128,8 +128,8 @@ def tokenize_function(self, examples, tokenizer):
                 },
                 {"role": "assistant", "content": examples["response"] + "\n\n"},
             ]
-            if self.config.get("custom_chat_template") is not None:
-                tokenizer.chat_template = self.config.get("custom_chat_template")
+            if self.config.get("chat_template") is not None:
+                tokenizer.chat_template = self.config.get("chat_template")
                 new_tokenizer = tokenizer.apply_chat_template(
                     new_messages,
                     tokenize=False,
@@ -140,7 +140,7 @@ def tokenize_function(self, examples, tokenizer):
                     tokenize=False,
                 )
             else:
-                tokenizer.chat_template = self.config.get("chat_template")
+                tokenizer.chat_template = self.config.get("default_chat_template")
                 new_tokenizer = tokenizer.apply_chat_template(
                     new_messages,
                     tokenize=False,
diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index 5bcff4ad9..85a678553 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-#!/usr/bin/env python
+# !/usr/bin/env python
 
 import os
 import argparse
@@ -283,49 +283,6 @@ def train_func(config: Dict[str, Any]):
             tokenizer=tokenizer,
             data_collator=data_collator,
         )
-    trainer = common.trainer.Trainer.registory.get("DefaultTrainer")(
-        config={
-            "device": config["Training"]["device"],
-            "accelerate_mode": config["Training"]["accelerate_mode"],
-            "num_train_epochs": epochs,
-            "max_train_steps": config["Training"].get("max_train_steps", None),
-            "logging_steps": config["Training"].get("logging_steps", 1),
-            "output": output_dir,
-            "dataprocesser": {
-                "type": "GeneralProcesser",
-                "per_device_train_batch_size": config["Training"]["batch_size"],
-                "per_device_eval_batch_size": config["Training"]["batch_size"],
-                "preprocessing_num_workers": config["Dataset"].get("preprocessing_num_workers", 1),
-                "max_length": config["Dataset"].get("max_length", 512),
-                "group": config["Dataset"].get("group", True),
-                "block_size": config["Dataset"].get("block_size", 512),
-                "shuffle": config["Dataset"].get("shuffle", False),
-                "gpt_base_model": config["General"].get("gpt_base_model", False),
-                "custom_chat_template": config["General"]["custom_chat_template"],
-                "chat_template": config["General"]["chat_template"],
-            },
-            "lr_scheduler": {
-                "enable": True,
-                "max_train_steps": None,
-                "lr_scheduler_type": config["Training"]["lr_scheduler"],
-                "num_warmup_steps": 0,
-                "learning_rate": config["Training"]["learning_rate"],
-                "weight_decay": config["Training"]["weight_decay"],
-            },
-            "checkpoint": {
-                "root_path": config["General"].get("checkpoint_dir", None),
-            },
-        }
-    )
-
-    try:
-        common.logger.info("trainer prepare start")
-        model.training = True
-        trainer.prepare(model, tokenizer, datasets, optimizer, accelerator)
-    except Exception as e:
-        common.logger.critical(e, exc_info=True)
-        exit(1)
-    common.logger.info("trainer prepare finish")
 
         common.logger.info("train start")
         trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
@@ -401,7 +358,15 @@ def main(external_config=None):
             )  # additional 1 for head worker
             ray.init(num_cpus=num_cpus, runtime_env=runtime_env)
         else:
-            ray.init(runtime_env=runtime_env)
+            import intel_extension_for_pytorch as ipex
+
+            if "xpu" in ipex.__version__:
+                num_cpus = (
+                        resources_per_worker["CPU"] * num_training_workers + 1
+                )  # additional 1 for head worker
+                ray.init(num_cpus=num_cpus, runtime_env=runtime_env)
+            else:
+                ray.init(runtime_env=runtime_env)
 
     common.logger.info(f"ray available resources = {ray.available_resources()}")
     use_gpu = True if device == "gpu" else False
diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
index 11ae93ce0..bbbb916af 100644
--- a/llm_on_ray/finetune/finetune_config.py
+++ b/llm_on_ray/finetune/finetune_config.py
@@ -60,8 +60,8 @@ class General(BaseModel):
     lora_config: Optional[LoraConfig] = None
     deltatuner_config: Optional[DeltatunerConfig] = None
     enable_gradient_checkpointing: bool = False
-    custom_chat_template: Optional[str] = None
-    chat_template: Optional[str] = (
+    chat_template: Optional[str] = None
+    default_chat_template: str = (
         "{{ bos_token }}"
         "{% if messages[0]['role'] == 'system' %}"
         "{{ raise_exception('System role not supported') }}"

From e08a93c18cbeca8227c6d3d7fba38d01fc7372c3 Mon Sep 17 00:00:00 2001
From: Xiaochang Wu <xiaochang.wu@intel.com>
Date: Tue, 9 Apr 2024 09:19:24 +0800
Subject: [PATCH 12/24] Support latest Ray 2.10 release (#158)

* update

* fix blocking

* update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* fix setup and getting started

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* nit

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* Add dependencies for tests and update pyproject.toml

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* Update dependencies and test workflow

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* Update dependencies and fix torch_dist.py

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

* Update OpenAI SDK installation and start ray cluster

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>

---------

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a18574675..451d2649d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,9 +34,9 @@ dependencies = [
     "deltatuner==1.1.9",
     "py-cpuinfo",
     "pydantic-yaml",
-    "async_timeout",
-    "typer",
-    "jinja2>=3.0.0"
+    "async-timeout",
+    "jinja2>=3.0.0",
+    "typer"
 ]
 
 [project.optional-dependencies]

From 1bbaf2285683a48e879a9e7aee196fabd8ee6ad7 Mon Sep 17 00:00:00 2001
From: yutianchen <tianchen.yu@intel.com>
Date: Tue, 9 Apr 2024 15:38:35 +0800
Subject: [PATCH 13/24] [Tests] Add query single test (#156)

* single test

* single test

* single test

* single test

* fix hang error
---
 tests/inference/test_query_single.py | 107 +++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 tests/inference/test_query_single.py

diff --git a/tests/inference/test_query_single.py b/tests/inference/test_query_single.py
new file mode 100644
index 000000000..1c32f6b73
--- /dev/null
+++ b/tests/inference/test_query_single.py
@@ -0,0 +1,107 @@
+import subprocess
+import pytest
+import os
+
+os.environ["no_proxy"] = "localhost,127.0.0.1"
+
+
+def start_serve(model_name):
+    current_path = os.path.dirname(os.path.abspath(__file__))
+
+    config_path = os.path.join(
+        current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml"
+    )
+
+    cmd_serve = ["llm_on_ray-serve", "--config_file", config_path, "--simple"]
+
+    result_serve = subprocess.run(cmd_serve, capture_output=True, text=True)
+
+    # Ensure there are no errors in the serve script execution
+    assert result_serve.returncode == 0, print(
+        "\n" + "Serve error stderr message: " + "\n", result_serve.stderr
+    )
+
+    # Print the output of subprocess.run for checking if output is expected
+    print("\n" + "Serve message: " + "\n", result_serve.stdout)
+
+    # Ensure there are no errors in the serve script execution
+    assert "Error" not in result_serve.stderr
+
+
+def script_with_args(
+    base_url, model_name, streaming_response, max_new_tokens, temperature, top_p, top_k
+):
+    current_path = os.path.dirname(os.path.abspath(__file__))
+
+    os.path.join(current_path, "../../.github/workflows/config/" + model_name + "-ci.yaml")
+
+    example_query_single_path = os.path.join(
+        current_path, "../../examples/inference/api_server_simple/query_single.py"
+    )
+
+    cmd_single = [
+        "python",
+        example_query_single_path,
+        "--model_endpoint",
+        base_url + model_name,
+    ]
+
+    if streaming_response:
+        cmd_single.append("--streaming_response")
+
+    if max_new_tokens is not None:
+        cmd_single.extend(["--max_new_tokens", str(max_new_tokens)])
+
+    if temperature is not None:
+        cmd_single.extend(["--temperature", str(temperature)])
+
+    if top_p is not None:
+        cmd_single.extend(["--top_p", str(top_p)])
+
+    if top_k is not None:
+        cmd_single.extend(["--top_k", str(top_k)])
+
+    result_query_single = subprocess.run(cmd_single, capture_output=True, text=True)
+
+    # Print the output of subprocess.run for checking if output is expected
+    print(result_query_single)
+
+    # Ensure there are no errors in the OpenAI API query script execution
+    assert "Error" not in result_query_single.stderr
+
+    # Returncode should be 0 when there is no exception
+    assert result_query_single.returncode == 0
+
+
+executed_models = {}
+
+
+# Parametrize the test function with different combinations of parameters
+# TODO: more models and combinations will be added and tested.
+@pytest.mark.parametrize(
+    "base_url,model_name,streaming_response,max_new_tokens,temperature,top_p, top_k",
+    [
+        (base_url, model_name, streaming_response, max_new_tokens, temperature, top_p, top_k)
+        for base_url in ["http://localhost:8000/"]
+        for model_name in ["gpt2"]
+        for streaming_response in [None]
+        for max_new_tokens in [None]
+        for temperature in [None]
+        for top_p in [None]
+        for top_k in [None]
+    ],
+)
+def test_script(
+    base_url, model_name, streaming_response, max_new_tokens, temperature, top_p, top_k
+):
+    global executed_models
+
+    # Check if this modelname has already executed start_serve
+    if model_name not in executed_models:
+        start_serve(model_name)
+        # Mark this modelname has already executed start_serve
+        executed_models[model_name] = True
+
+    script_with_args(
+        base_url, model_name, streaming_response, max_new_tokens, temperature, top_p, top_k
+    )

From 9498efe9dae2d6547d15d1fd551eda8f2ead0cd0 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Wed, 10 Apr 2024 08:50:32 +0000
Subject: [PATCH 14/24] format

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py |  20 +-
 llm_on_ray/finetune/finetune.py               |   2 +-
 tests/finetune/test_chat_template.py          | 180 +++++++++---------
 3 files changed, 104 insertions(+), 98 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 636ec006c..1dc953d27 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -116,15 +116,19 @@ def tokenize_function(self, examples, tokenizer):
                 new_message = PROMPT_NO_INPUT_FORMAT.format(
                     instruction=instruction, response=response
                 )
-            return tokenizer(new_message, add_special_tokens=False, max_length=self.config.get("max_length"))
+            return tokenizer(
+                new_message, add_special_tokens=False, max_length=self.config.get("max_length")
+            )
         else:
             new_messages = [
                 {
                     "role": "user",
                     "content": "###Instruction:\n"
-                            + examples["instruction"] + "\n\n"
-                            + "###context:\n"
-                            + examples["context"] + "\n\n",
+                    + examples["instruction"]
+                    + "\n\n"
+                    + "###context:\n"
+                    + examples["context"]
+                    + "\n\n",
                 },
                 {"role": "assistant", "content": examples["response"] + "\n\n"},
             ]
@@ -145,9 +149,9 @@ def tokenize_function(self, examples, tokenizer):
                     new_messages,
                     tokenize=False,
                 )
-            tokenizer = tokenizer(new_tokenizer,
-                                  add_special_tokens=False,
-                                  max_length=self.config.get("max_length"))
+            tokenizer = tokenizer(
+                new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
+            )
             return tokenizer
 
     def prepare(self, tokenizer, dataset):
@@ -183,7 +187,7 @@ def group_texts(examples):
                     total_length = (total_length // block_size) * block_size
                 # Split by chunks of max_len.
                 result = {
-                    k: [t[i: i + block_size] for i in range(0, total_length, block_size)]
+                    k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
                     for k, t in concatenated_examples.items()
                 }
                 result["labels"] = result["input_ids"].copy()
diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index 85a678553..ae2e36c87 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -362,7 +362,7 @@ def main(external_config=None):
 
             if "xpu" in ipex.__version__:
                 num_cpus = (
-                        resources_per_worker["CPU"] * num_training_workers + 1
+                    resources_per_worker["CPU"] * num_training_workers + 1
                 )  # additional 1 for head worker
                 ray.init(num_cpus=num_cpus, runtime_env=runtime_env)
             else:
diff --git a/tests/finetune/test_chat_template.py b/tests/finetune/test_chat_template.py
index 7cdda115c..2270a5781 100644
--- a/tests/finetune/test_chat_template.py
+++ b/tests/finetune/test_chat_template.py
@@ -7,133 +7,135 @@
 
 class TestTokenizeFunction(unittest.TestCase):
     def setUp(self):
-        self.tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf')
+        self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
         self.config = {
-            'gpt_base_model': True,
-            'max_length': 512,
-            'trust_remote_code': False,
-            'chat_template': "Below is an instruction that describes a task. Write a response that appropriately "
-                             "completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
-                             "'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
-                             "'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
-                             "must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
-                             "== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
-                             "'assistant' %}{{ '### Response: '  + message['content'] }}{% endif %}{% endfor %}{{'### "
-                             "End \n'}}",
+            "gpt_base_model": True,
+            "max_length": 512,
+            "trust_remote_code": False,
+            "chat_template": "Below is an instruction that describes a task. Write a response that appropriately "
+            "completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
+            "'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
+            "'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
+            "must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
+            "== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
+            "'assistant' %}{{ '### Response: '  + message['content'] }}{% endif %}{% endfor %}{{'### "
+            "End \n'}}",
         }
         self.processer = GeneralProcesser(self.config)
 
     def test_tokenize_function_with_gpt_model(self):
-        self.tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-j-6b')
+        self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
 
-        examples = \
-            {
-                "instruction": "Test instruction",
-                "response": "Test response",
-                "context": "Test context",
-            }
+        examples = {
+            "instruction": "Test instruction",
+            "response": "Test response",
+            "context": "Test context",
+        }
 
         # Verify the format of the result
-        expected_result = 'Below is an instruction that describes a task. Write a response that '\
-                          'appropriately completes the request.\n'\
-                          '\n'\
-                          '### Instruction:\n'\
-                          'Test instruction\n'\
-                          '\n'\
-                          'Input:\n'\
-                          'Test context\n'\
-                          '\n'\
-                          '### Response:\n'\
-                          'Test response\n'\
-                          '\n'\
-                          '### End'
+        expected_result = (
+            "Below is an instruction that describes a task. Write a response that "
+            "appropriately completes the request.\n"
+            "\n"
+            "### Instruction:\n"
+            "Test instruction\n"
+            "\n"
+            "Input:\n"
+            "Test context\n"
+            "\n"
+            "### Response:\n"
+            "Test response\n"
+            "\n"
+            "### End"
+        )
 
         result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
 
     def test_tokenize_function_with_custom_chat_template(self):
-        examples = \
-            {
-                "instruction": "Test instruction",
-                "response": "Test response",
-                "context": "Test context",
-            }
+        examples = {
+            "instruction": "Test instruction",
+            "response": "Test response",
+            "context": "Test context",
+        }
 
         # Verify the format of the result
-        expected_result = '<|im_start|>user\n' \
-                          '###Instruction:\n' \
-                          'Test instruction\n' \
-                          '\n' \
-                          '###context:\n' \
-                          'Test context\n' \
-                          '\n' \
-                          '<|im_end|><|im_start|>assistant\n' \
-                          'Test response\n' \
-                          '\n' \
-                          '<|im_end|>'
+        expected_result = (
+            "<|im_start|>user\n"
+            "###Instruction:\n"
+            "Test instruction\n"
+            "\n"
+            "###context:\n"
+            "Test context\n"
+            "\n"
+            "<|im_end|><|im_start|>assistant\n"
+            "Test response\n"
+            "\n"
+            "<|im_end|>"
+        )
         # Set custom chat template
-        self.config['custom_chat_template'] = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n'"\
-                                              "+ message['content'] + '<|im_end|>'}}{% endfor %}"
+        self.config["custom_chat_template"] = (
+            "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n'"
+            "+ message['content'] + '<|im_end|>'}}{% endfor %}"
+        )
 
-        self.config['gpt_base_model'] = False
+        self.config["gpt_base_model"] = False
         result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
 
     def test_tokenize_function_with_chat_template(self):
-        examples = \
-            {
-                "instruction": "Test instruction",
-                "response": "Test response",
-                "context": "Test context",
-            }
+        examples = {
+            "instruction": "Test instruction",
+            "response": "Test response",
+            "context": "Test context",
+        }
 
         # Verify the format of the result
-        expected_result = 'Below is an instruction that describes a task. Write a response that '\
-                          'appropriately completes the request\n'\
-                          '### Instruction: ###Instruction:\n'\
-                          'Test instruction\n'\
-                          '\n'\
-                          '###context:\n'\
-                          'Test context\n'\
-                          '\n'\
-                          '### Response: Test response\n'\
-                          '\n'\
-                          '### End \n'\
-
-        self.config['gpt_base_model'] = False
+        expected_result = (
+            "Below is an instruction that describes a task. Write a response that "
+            "appropriately completes the request\n"
+            "### Instruction: ###Instruction:\n"
+            "Test instruction\n"
+            "\n"
+            "###context:\n"
+            "Test context\n"
+            "\n"
+            "### Response: Test response\n"
+            "\n"
+            "### End \n"
+        )
+        self.config["gpt_base_model"] = False
         result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
 
     def test_tokenize_function_with_default_chat_template(self):
-        self.tokenizer = AutoTokenizer.from_pretrained('google/gemma-2b-it')
-        examples = \
-            {
-                "instruction": "Test instruction",
-                "response": "Test response",
-                "context": "Test context",
-            }
+        self.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
+        examples = {
+            "instruction": "Test instruction",
+            "response": "Test response",
+            "context": "Test context",
+        }
 
         chat_example = [
             {
                 "role": "user",
                 "content": "###Instruction:\nTest instruction\n\n###context:\nTest context\n\n",
-
             },
             {
                 "role": "assistant",
                 "content": "Test response\n\n",
-            }
+            },
         ]
 
         # Verify the format of the result
-        expected_result = self.tokenizer.apply_chat_template(chat_example,
-                                                             tokenize=False,
-                                                             max_length=self.config.get("max_length"))
+        expected_result = self.tokenizer.apply_chat_template(
+            chat_example, tokenize=False, max_length=self.config.get("max_length")
+        )
 
-        self.config['gpt_base_model'] = False
+        self.config["gpt_base_model"] = False
         result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result['input_ids']), expected_result)
+        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

From 115c513fb9b9648b374d102d6e5b23479da0be71 Mon Sep 17 00:00:00 2001
From: minmingzhu <45281494+minmingzhu@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:22:03 +0000
Subject: [PATCH 15/24] [Finetune] use base model mpt-7b instead of mpt-7b-chat
 (#181)

* use base model mpt-7b instead of mpt-7b-chat

Signed-off-by: minmingzhu <minming.zhu@intel.com>

* manual setting specify tokenizer

Signed-off-by: minmingzhu <minming.zhu@intel.com>

* update

Signed-off-by: minmingzhu <minming.zhu@intel.com>

* update doc/finetune_parameters.md

Signed-off-by: minmingzhu <minming.zhu@intel.com>

---------

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 llm_on_ray/finetune/models/mpt-7b.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llm_on_ray/finetune/models/mpt-7b.yaml b/llm_on_ray/finetune/models/mpt-7b.yaml
index 8e719f186..ef2efa006 100644
--- a/llm_on_ray/finetune/models/mpt-7b.yaml
+++ b/llm_on_ray/finetune/models/mpt-7b.yaml
@@ -1,7 +1,6 @@
 General:
   base_model: mosaicml/mpt-7b
   tokenizer_name: EleutherAI/gpt-neox-20b
-  is_base_model: false
   gpt_base_model: false
   output_dir: /tmp/llm-ray/output
   save_strategy: no

From cfa3064847b65ee88d302833aa23e5de8f8defa2 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 22 Apr 2024 06:27:14 +0000
Subject: [PATCH 16/24] fix license issues

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 tests/finetune/test_chat_template.py | 15 +++++++++++++++
 tests/inference/test_query_single.py | 16 ++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/tests/finetune/test_chat_template.py b/tests/finetune/test_chat_template.py
index 2270a5781..a416d8f7b 100644
--- a/tests/finetune/test_chat_template.py
+++ b/tests/finetune/test_chat_template.py
@@ -1,3 +1,18 @@
+#
+# Copyright 2023 The LLM-on-Ray Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 import unittest
 
 import transformers
diff --git a/tests/inference/test_query_single.py b/tests/inference/test_query_single.py
index 1c32f6b73..d48727a30 100644
--- a/tests/inference/test_query_single.py
+++ b/tests/inference/test_query_single.py
@@ -1,3 +1,19 @@
+#
+# Copyright 2023 The LLM-on-Ray Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import subprocess
 import pytest
 import os

From c0e4d2d127c6e5df1716cebe338c5b249f55b783 Mon Sep 17 00:00:00 2001
From: minmingzhu <45281494+minmingzhu@users.noreply.github.com>
Date: Mon, 22 Apr 2024 14:18:31 +0800
Subject: [PATCH 17/24] Update finetune.yaml

---
 llm_on_ray/finetune/finetune.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llm_on_ray/finetune/finetune.yaml b/llm_on_ray/finetune/finetune.yaml
index 15b38501f..627a88753 100644
--- a/llm_on_ray/finetune/finetune.yaml
+++ b/llm_on_ray/finetune/finetune.yaml
@@ -12,7 +12,6 @@ General:
     lora_alpha: 32
     lora_dropout: 0.1
   enable_gradient_checkpointing: false
-  custom_chat_template: null
 Dataset:
   train_file: examples/data/sample_finetune_data_small.jsonl
   group: true

From b24c9f0188363c01650bda61d8fa0567da5bbd82 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Tue, 14 May 2024 14:08:15 +0800
Subject: [PATCH 18/24] refactor datap rocesser

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 llm_on_ray/common/dataprocesser/__init__.py   |   3 +-
 .../common/dataprocesser/general_processer.py | 126 +++++++++++++----
 llm_on_ray/common/trainer/default_trainer.py  |   8 +-
 tests/finetune/test_chat_template.py          |  52 +++----
 tests/finetune/test_slimOrca_chat_template.py | 128 ++++++++++++++++++
 5 files changed, 263 insertions(+), 54 deletions(-)
 create mode 100644 tests/finetune/test_slimOrca_chat_template.py

diff --git a/llm_on_ray/common/dataprocesser/__init__.py b/llm_on_ray/common/dataprocesser/__init__.py
index 2b5152764..c1bf68ae8 100644
--- a/llm_on_ray/common/dataprocesser/__init__.py
+++ b/llm_on_ray/common/dataprocesser/__init__.py
@@ -15,7 +15,8 @@
 #
 
 from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
-from llm_on_ray.common.dataprocesser.general_processer import GeneralProcesser
+from llm_on_ray.common.dataprocesser.general_processer import ChatDataPreprocess
+from llm_on_ray.common.dataprocesser.general_processer import SlimOrcaDataPreprocess
 from llm_on_ray.common.dataprocesser.rm_dataprocesser import RMDataProcesser
 
 
diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 1dc953d27..31094aa8b 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -24,9 +24,9 @@
 from llm_on_ray.common.dataprocesser import DataProcesser
 
 INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
-INSTRUCTION_KEY = "### Instruction:"
-INPUT_KEY = "Input:"
-RESPONSE_KEY = "### Response:"
+INSTRUCTION_KEY = "### Instruction: "
+INPUT_KEY = "Input: "
+RESPONSE_KEY = "### Response: "
 END_KEY = "### End"
 RESPONSE_KEY_NL = f"{RESPONSE_KEY}\n"
 
@@ -70,6 +70,11 @@
 )
 TEXT_COLUMN_NAME = "text"
 
+SLIMORCA_PROMPT_DICT = {
+    "prompt_with_input": ("### System: {system} \n" "### User: {user} \n### Assistant: {gpt}"),
+    "prompt_without_input": ("### System: {system} \n" "### Assistant: {gpt}"),
+}
+
 
 class DataCollatorForCompletionOnlyLM(transformers.DataCollatorForLanguageModeling):
     def torch_call(self, examples):
@@ -98,8 +103,17 @@ def torch_call(self, examples):
         return batch
 
 
-class GeneralProcesser(DataProcesser):
-    def tokenize_function(self, examples, tokenizer):
+class ChatDataPreprocess(DataProcesser):
+    base_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.\n"""
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.prompt_template = self.base_template
+        self.user = "### Instruction:\n"
+        self.assistant = "### Response:\n"
+        self.end = "### End\n"
+
+    def create_data(self, examples):
         if self.config.get("gpt_base_model"):
             instruction = examples["instruction"]
             response = examples["response"]
@@ -109,50 +123,49 @@ def tokenize_function(self, examples, tokenizer):
             if not response:
                 raise ValueError(f"Expected a response in: {examples}")
             if context:
-                new_message = PROMPT_WITH_INPUT_FORMAT.format(
+                new_messages = PROMPT_WITH_INPUT_FORMAT.format(
                     instruction=instruction, response=response, input=context
                 )
             else:
-                new_message = PROMPT_NO_INPUT_FORMAT.format(
+                new_messages = PROMPT_NO_INPUT_FORMAT.format(
                     instruction=instruction, response=response
                 )
-            return tokenizer(
-                new_message, add_special_tokens=False, max_length=self.config.get("max_length")
-            )
         else:
             new_messages = [
                 {
                     "role": "user",
-                    "content": "###Instruction:\n"
-                    + examples["instruction"]
+                    "content": examples["instruction"]
                     + "\n\n"
-                    + "###context:\n"
+                    + INPUT_KEY
                     + examples["context"]
                     + "\n\n",
                 },
                 {"role": "assistant", "content": examples["response"] + "\n\n"},
             ]
+
+        return new_messages
+
+    def tokenize_func(self, tokenizer, message):
+        if self.config.get("gpt_base_model"):
+            return tokenizer(
+                message, add_special_tokens=False, max_length=self.config.get("max_length")
+            )
+        else:
             if self.config.get("chat_template") is not None:
                 tokenizer.chat_template = self.config.get("chat_template")
-                new_tokenizer = tokenizer.apply_chat_template(
-                    new_messages,
-                    tokenize=False,
-                )
             elif tokenizer.chat_template is not None:
-                new_tokenizer = tokenizer.apply_chat_template(
-                    new_messages,
-                    tokenize=False,
-                )
+                pass
             else:
                 tokenizer.chat_template = self.config.get("default_chat_template")
-                new_tokenizer = tokenizer.apply_chat_template(
-                    new_messages,
-                    tokenize=False,
-                )
-            tokenizer = tokenizer(
+
+            new_tokenizer = tokenizer.apply_chat_template(
+                message,
+                tokenize=False,
+            )
+            print(new_tokenizer)
+            return tokenizer(
                 new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
             )
-            return tokenizer
 
     def prepare(self, tokenizer, dataset):
         per_device_train_batch_size = self.config.get("per_device_train_batch_size")
@@ -169,7 +182,7 @@ def prepare(self, tokenizer, dataset):
             column_names = dataset["train"].column_names
 
         tokenized_datasets = dataset.map(
-            lambda examples: self.tokenize_function(examples, tokenizer),
+            lambda examples: self.tokenize_func(tokenizer, self.create_data(examples)),
             remove_columns=column_names,
             load_from_cache_file=False,
             desc="Tokenize dataset",
@@ -235,3 +248,60 @@ def prepare_dataloader(self, tokenizer, dataset):
             }
             eval_dataloader = torch.utils.data.DataLoader(eval_dataset, **eval_dataloader_params)
         return train_dataloader, eval_dataloader
+
+
+class SlimOrcaDataPreprocess(ChatDataPreprocess):
+    chat_template = (
+        "{% for message in messages %}"
+        "{% if message['role'] == 'system' %}"
+        "{{ '### System: ' + message['content'] }}"
+        "{% elif message['role'] == 'user' %}"
+        "{{ '### User: ' + message['content'] }}"
+        "{% elif message['role'] == 'assistant' %}"
+        "{{ '### Assistant: ' + message['content'] }}"
+        "{% endif %}"
+        "{% endfor %}"
+    )
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.config["chat_template"] = self.chat_template
+        self.default_system = "You are a helpful, respectful and honest assistant."
+
+    def create_data(self, data):
+        examples = {}
+        conv = data["conversations"]
+        # system
+        if conv[0]["from"] != "system":
+            examples["system"] = self.default_system
+            start = 0
+        elif conv[0]["from"] == "system" and conv[0]["value"] == "":
+            examples[conv[0]["from"]] = self.default_system
+            start = 1
+        else:
+            examples[conv[0]["from"]] = conv[0]["value"]
+            start = 1
+
+        for j in range(start, len(conv) - 1, 2):
+            examples[conv[j]["from"]] = conv[j]["value"]
+            examples[conv[j + 1]["from"]] = conv[j + 1]["value"]
+
+        new_messages = [
+            {"role": "system", "content": examples["system"] + "\n"},
+            {
+                "role": "user",
+                "content": examples["human"] + "\n",
+            },
+            {"role": "assistant", "content": examples["gpt"] + "\n"},
+        ]
+        if self.config.get("gpt_base_model"):
+            if examples["human"]:
+                return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
+                    system=examples["system"], user=examples["human"], gpt=examples["gpt"]
+                )
+            else:
+                return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
+                    system=examples["human"], gpt=examples["gpt"]
+                )
+        else:
+            return new_messages
diff --git a/llm_on_ray/common/trainer/default_trainer.py b/llm_on_ray/common/trainer/default_trainer.py
index 5509bc3a1..61d9d6015 100644
--- a/llm_on_ray/common/trainer/default_trainer.py
+++ b/llm_on_ray/common/trainer/default_trainer.py
@@ -37,7 +37,13 @@ def __init__(self, config):
         self.config = config
         dataprocesser_config = config.get("dataprocesser")
         dataprocesser_type = dataprocesser_config.get("type")
-        Factory = dataprocesser.DataProcesser.registory.get(dataprocesser_type)
+        if dataprocesser_type == "chat":
+            Factory = dataprocesser.DataProcesser.registory.get("ChatDataPreprocess")
+        elif dataprocesser_type == "SlimOrca":
+            Factory = dataprocesser.DataProcesser.registory.get("SlimOrcaDataPreprocess")
+        else:
+            raise ValueError(f"there is no {dataprocesser_type} dataprocesser.")
+
         if Factory is None:
             raise ValueError(f"there is no {dataprocesser_type} dataprocesser.")
         self.dataprocesser = Factory(dataprocesser_config)
diff --git a/tests/finetune/test_chat_template.py b/tests/finetune/test_chat_template.py
index a416d8f7b..4d1217b6c 100644
--- a/tests/finetune/test_chat_template.py
+++ b/tests/finetune/test_chat_template.py
@@ -17,7 +17,7 @@
 
 import transformers
 from transformers import AutoTokenizer
-from llm_on_ray.common.dataprocesser.general_processer import GeneralProcesser
+from llm_on_ray.common.dataprocesser.general_processer import ChatDataPreprocess
 
 
 class TestTokenizeFunction(unittest.TestCase):
@@ -36,7 +36,7 @@ def setUp(self):
             "'assistant' %}{{ '### Response: '  + message['content'] }}{% endif %}{% endfor %}{{'### "
             "End \n'}}",
         }
-        self.processer = GeneralProcesser(self.config)
+        self.processer = ChatDataPreprocess(self.config)
 
     def test_tokenize_function_with_gpt_model(self):
         self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
@@ -52,20 +52,23 @@ def test_tokenize_function_with_gpt_model(self):
             "Below is an instruction that describes a task. Write a response that "
             "appropriately completes the request.\n"
             "\n"
-            "### Instruction:\n"
+            "### Instruction: \n"
             "Test instruction\n"
             "\n"
-            "Input:\n"
+            "Input: \n"
             "Test context\n"
             "\n"
-            "### Response:\n"
+            "### Response: \n"
             "Test response\n"
             "\n"
             "### End"
         )
 
-        result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
+        print(self.processer.create_data(examples))
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(examples))
+        print(self.tokenizer.decode(result["input_ids"]))
+
+        self.assertEqual(expected_result, self.tokenizer.decode(result["input_ids"]))
 
     def test_tokenize_function_with_custom_chat_template(self):
         examples = {
@@ -77,28 +80,30 @@ def test_tokenize_function_with_custom_chat_template(self):
         # Verify the format of the result
         expected_result = (
             "<|im_start|>user\n"
-            "###Instruction:\n"
             "Test instruction\n"
             "\n"
-            "###context:\n"
-            "Test context\n"
+            "Input: Test context\n"
             "\n"
             "<|im_end|><|im_start|>assistant\n"
             "Test response\n"
             "\n"
             "<|im_end|>"
         )
+
+        print(expected_result)
         # Set custom chat template
-        self.config["custom_chat_template"] = (
+        self.config["chat_template"] = (
             "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n'"
             "+ message['content'] + '<|im_end|>'}}{% endfor %}"
         )
 
         self.config["gpt_base_model"] = False
-        result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
+        print(self.processer.create_data(examples))
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(examples))
+        print(self.tokenizer.decode(result["input_ids"]))
+        self.assertEqual(expected_result, self.tokenizer.decode(result["input_ids"]))
 
-    def test_tokenize_function_with_chat_template(self):
+    def test_tokenize_function_with_default_chat_template(self):
         examples = {
             "instruction": "Test instruction",
             "response": "Test response",
@@ -109,21 +114,19 @@ def test_tokenize_function_with_chat_template(self):
         expected_result = (
             "Below is an instruction that describes a task. Write a response that "
             "appropriately completes the request\n"
-            "### Instruction: ###Instruction:\n"
-            "Test instruction\n"
+            "### Instruction: Test instruction\n"
             "\n"
-            "###context:\n"
-            "Test context\n"
+            "Input: Test context\n"
             "\n"
             "### Response: Test response\n"
             "\n"
             "### End \n"
         )
         self.config["gpt_base_model"] = False
-        result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(examples))
+        self.assertEqual(expected_result, self.tokenizer.decode(result["input_ids"]))
 
-    def test_tokenize_function_with_default_chat_template(self):
+    def test_tokenize_function_with_tokenizer_chat_template(self):
         self.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
         examples = {
             "instruction": "Test instruction",
@@ -134,7 +137,7 @@ def test_tokenize_function_with_default_chat_template(self):
         chat_example = [
             {
                 "role": "user",
-                "content": "###Instruction:\nTest instruction\n\n###context:\nTest context\n\n",
+                "content": "Test instruction\n\nInput: Test context\n\n",
             },
             {
                 "role": "assistant",
@@ -147,9 +150,10 @@ def test_tokenize_function_with_default_chat_template(self):
             chat_example, tokenize=False, max_length=self.config.get("max_length")
         )
 
+        self.config["chat_template"] = None
         self.config["gpt_base_model"] = False
-        result = self.processer.tokenize_function(examples, self.tokenizer)
-        self.assertEqual(self.tokenizer.decode(result["input_ids"]), expected_result)
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(examples))
+        self.assertEqual(expected_result, self.tokenizer.decode(result["input_ids"]))
 
 
 if __name__ == "__main__":
diff --git a/tests/finetune/test_slimOrca_chat_template.py b/tests/finetune/test_slimOrca_chat_template.py
new file mode 100644
index 000000000..059a316d1
--- /dev/null
+++ b/tests/finetune/test_slimOrca_chat_template.py
@@ -0,0 +1,128 @@
+#
+# Copyright 2023 The LLM-on-Ray Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import transformers
+from datasets import Dataset
+from transformers import AutoTokenizer
+from llm_on_ray.common.dataprocesser.general_processer import (
+    ChatDataPreprocess,
+    SlimOrcaDataPreprocess,
+)
+
+
+class TestTokenizeFunction(unittest.TestCase):
+    def setUp(self):
+        self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+        self.config = {
+            "gpt_base_model": True,
+            "max_length": 512,
+            "trust_remote_code": False,
+            "chat_template": "Below is an instruction that describes a task. Write a response that appropriately "
+            "completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
+            "'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
+            "'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
+            "must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
+            "== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
+            "'assistant' %}{{ '### Response: '  + message['content'] }}{% endif %}{% endfor %}{{'### "
+            "End \n'}}",
+        }
+        self.processer = SlimOrcaDataPreprocess(self.config)
+        examples = {
+            "conversations": [
+                {"from": "system", "value": "Test system", "weight": None},
+                {"from": "human", "value": "Test human", "weight": 0},
+                {"from": "gpt", "value": "Test gpt.", "weight": 1},
+            ]
+        }
+
+        self.ds = Dataset.from_dict(examples)
+
+    def test_tokenize_function_with_gpt_model(self):
+        self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
+
+        # Verify the format of the result
+        expected_result = (
+            "### System: Test system \n" "### User: Test human \n" "### Assistant: Test gpt."
+        )
+
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(self.ds))
+
+        self.assertEqual(expected_result, self.tokenizer.decode(result["input_ids"]))
+
+    def test_tokenize_function_with_custom_chat_template(self):
+        # Verify the format of the result
+        expected_result = (
+            "<|im_start|>system\n"
+            "Test system\n"
+            "<|im_end|><|im_start|>user\n"
+            "Test human\n"
+            "<|im_end|><|im_start|>assistant\n"
+            "Test gpt.\n"
+            "<|im_end|>"
+        )
+
+        print(expected_result)
+        # Set custom chat template
+        self.config["chat_template"] = (
+            "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n'"
+            "+ message['content'] + '<|im_end|>'}}{% endfor %}"
+        )
+
+        self.config["gpt_base_model"] = False
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(self.ds))
+        self.assertEqual(expected_result, self.tokenizer.decode(result["input_ids"]))
+
+    def test_tokenize_function_with_default_chat_template(self):
+        # Verify the format of the result
+        expected_result = (
+            "### System: Test system\n" "### User: Test human\n" "### Assistant: Test gpt.\n"
+        )
+        self.config["gpt_base_model"] = False
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(self.ds))
+        self.assertEqual(expected_result, self.tokenizer.decode(result["input_ids"]))
+
+    def test_tokenize_function_with_tokenizer_chat_template(self):
+        self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+
+        chat_example = [
+            {
+                "role": "system",
+                "content": "Test system\n",
+            },
+            {
+                "role": "user",
+                "content": "Test human\n",
+            },
+            {
+                "role": "assistant",
+                "content": "Test gpt.\n",
+            },
+        ]
+
+        # Verify the format of the result
+        expected_result = self.tokenizer.apply_chat_template(
+            chat_example, tokenize=True, max_length=self.config.get("max_length")
+        )
+
+        self.config["chat_template"] = None
+        self.config["gpt_base_model"] = False
+        result = self.processer.tokenize_func(self.tokenizer, self.processer.create_data(self.ds))
+        self.assertEqual(expected_result, result["input_ids"])
+
+
+if __name__ == "__main__":
+    unittest.main()

From f0d94d11f7835d3ffd31b536a0f94aeafe792244 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Tue, 14 May 2024 14:24:35 +0800
Subject: [PATCH 19/24] update

---
 llm_on_ray/finetune/finetune.yaml      | 3 ++-
 llm_on_ray/finetune/finetune_config.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/llm_on_ray/finetune/finetune.yaml b/llm_on_ray/finetune/finetune.yaml
index 627a88753..78a9e1c57 100644
--- a/llm_on_ray/finetune/finetune.yaml
+++ b/llm_on_ray/finetune/finetune.yaml
@@ -13,7 +13,8 @@ General:
     lora_dropout: 0.1
   enable_gradient_checkpointing: false
 Dataset:
-  train_file: examples/data/sample_finetune_data_small.jsonl
+  type: "SlimOrca"
+  train_file: Open-Orca/SlimOrca
   group: true
   max_length: 512
   block_size: 512
diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
index bbbb916af..3046d96c3 100644
--- a/llm_on_ray/finetune/finetune_config.py
+++ b/llm_on_ray/finetune/finetune_config.py
@@ -80,6 +80,7 @@ class General(BaseModel):
 
 
 class Dataset(BaseModel):
+    type: str = "chat"
     train_file: str
     validation_file: Optional[str]
     validation_split_percentage: int

From 6075c2c97dda09c38031e851797445bfd1c69763 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Tue, 14 May 2024 22:16:41 +0800
Subject: [PATCH 20/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 .../common/dataprocesser/general_processer.py | 51 +++++++++----------
 tests/finetune/test_chat_template.py          | 28 +++++++---
 2 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 31094aa8b..3cb32e778 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -132,15 +132,19 @@ def create_data(self, examples):
                 )
         else:
             new_messages = [
+                {
+                    "role": "system",
+                    "content": INTRO_BLURB + "\n",
+                },
                 {
                     "role": "user",
                     "content": examples["instruction"]
-                    + "\n\n"
+                    + "\n"
                     + INPUT_KEY
                     + examples["context"]
-                    + "\n\n",
+                    + "\n",
                 },
-                {"role": "assistant", "content": examples["response"] + "\n\n"},
+                {"role": "assistant", "content": examples["response"] + "\n"},
             ]
 
         return new_messages
@@ -162,7 +166,6 @@ def tokenize_func(self, tokenizer, message):
                 message,
                 tokenize=False,
             )
-            print(new_tokenizer)
             return tokenizer(
                 new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
             )
@@ -251,21 +254,9 @@ def prepare_dataloader(self, tokenizer, dataset):
 
 
 class SlimOrcaDataPreprocess(ChatDataPreprocess):
-    chat_template = (
-        "{% for message in messages %}"
-        "{% if message['role'] == 'system' %}"
-        "{{ '### System: ' + message['content'] }}"
-        "{% elif message['role'] == 'user' %}"
-        "{{ '### User: ' + message['content'] }}"
-        "{% elif message['role'] == 'assistant' %}"
-        "{{ '### Assistant: ' + message['content'] }}"
-        "{% endif %}"
-        "{% endfor %}"
-    )
 
     def __init__(self, config):
         super().__init__(config)
-        self.config["chat_template"] = self.chat_template
         self.default_system = "You are a helpful, respectful and honest assistant."
 
     def create_data(self, data):
@@ -286,22 +277,26 @@ def create_data(self, data):
             examples[conv[j]["from"]] = conv[j]["value"]
             examples[conv[j + 1]["from"]] = conv[j + 1]["value"]
 
-        new_messages = [
-            {"role": "system", "content": examples["system"] + "\n"},
-            {
-                "role": "user",
-                "content": examples["human"] + "\n",
-            },
-            {"role": "assistant", "content": examples["gpt"] + "\n"},
-        ]
         if self.config.get("gpt_base_model"):
             if examples["human"]:
-                return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
-                    system=examples["system"], user=examples["human"], gpt=examples["gpt"]
+                return PROMPT_WITH_INPUT_FORMAT.format(
+                    instruction=examples["system"], response=examples["gpt"], input=examples["human"]
                 )
             else:
-                return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
-                    system=examples["human"], gpt=examples["gpt"]
+                return PROMPT_NO_INPUT_FORMAT.format(
+                    instruction=examples["system"], response=examples["gpt"]
                 )
         else:
+            new_messages = [
+                {"role": "system", "content": INTRO_BLURB + "\n"},
+                {
+                    "role": "user",
+                    "content": examples["system"]
+                               + "\n"
+                               + INPUT_KEY
+                               + examples["human"]
+                               + "\n",
+                },
+                {"role": "assistant", "content": examples["gpt"] + "\n"},
+            ]
             return new_messages
diff --git a/tests/finetune/test_chat_template.py b/tests/finetune/test_chat_template.py
index 4d1217b6c..31d0eed12 100644
--- a/tests/finetune/test_chat_template.py
+++ b/tests/finetune/test_chat_template.py
@@ -27,14 +27,26 @@ def setUp(self):
             "gpt_base_model": True,
             "max_length": 512,
             "trust_remote_code": False,
-            "chat_template": "Below is an instruction that describes a task. Write a response that appropriately "
-            "completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
-            "'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
-            "'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
-            "must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
-            "== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
-            "'assistant' %}{{ '### Response: '  + message['content'] }}{% endif %}{% endfor %}{{'### "
-            "End \n'}}",
+            "chat_template": "{% if messages[0]['role'] == 'system' %}"
+                             "{% set loop_messages = messages[1:] %}"
+                             "{% set system_message = messages[0]['content'] %}"
+                             "{% else %}"
+                             "{% set loop_messages = messages %}"
+                             "{% set system_message = false %}"
+                             "{% endif %}"
+                             "{% for message in loop_messages %}"
+                             "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
+                             "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
+                             "{% endif %}"
+                             "{% if loop.index0 == 0 and system_message %}"
+                             "{{ system_message }}"
+                             "{% endif %}"
+                             "{% if message['role'] == 'user' %}"
+                             "{{ '### Instruction: ' + message['content'] + eos_token }}"
+                             "{% elif message['role'] == 'assistant' %}"
+                             "{{ '### Response:'  + message['content'] + eos_token }}"
+                             "{% endif %}{% endfor %}"
+                             "{{'### End \n'}}",
         }
         self.processer = ChatDataPreprocess(self.config)
 

From c17ce45af11f5dfd7d1c53ce578c4ab74f577106 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Fri, 17 May 2024 14:22:24 +0800
Subject: [PATCH 21/24] update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
---
 llm_on_ray/common/__init__.py                 | 11 +++-
 .../common/dataprocesser/general_processer.py | 59 ++++++++++++-------
 2 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/llm_on_ray/common/__init__.py b/llm_on_ray/common/__init__.py
index 0e8e821ad..e002976b6 100644
--- a/llm_on_ray/common/__init__.py
+++ b/llm_on_ray/common/__init__.py
@@ -18,4 +18,13 @@
 from llm_on_ray.common.torch_config import TorchConfig
 from llm_on_ray.common.config import Config
 from llm_on_ray.common.init import init
-from llm_on_ray.common import agentenv, dataset, initializer, model, optimizer, tokenizer, trainer
+from llm_on_ray.common import (
+    agentenv,
+    dataset,
+    initializer,
+    model,
+    optimizer,
+    tokenizer,
+    trainer,
+    dataprocesser,
+)
diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 3cb32e778..6f8a9e1e2 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -27,7 +27,6 @@
 INSTRUCTION_KEY = "### Instruction: "
 INPUT_KEY = "Input: "
 RESPONSE_KEY = "### Response: "
-END_KEY = "### End"
 RESPONSE_KEY_NL = f"{RESPONSE_KEY}\n"
 
 PROMPT_NO_INPUT_FORMAT = """{intro}
@@ -36,15 +35,12 @@
 {instruction}
 
 {response_key}
-{response}
-
-{end_key}""".format(
+{response}""".format(
     intro=INTRO_BLURB,
     instruction_key=INSTRUCTION_KEY,
     instruction="{instruction}",
     response_key=RESPONSE_KEY,
     response="{response}",
-    end_key=END_KEY,
 )
 
 PROMPT_WITH_INPUT_FORMAT = """{intro}
@@ -56,9 +52,7 @@
 {input}
 
 {response_key}
-{response}
-
-{end_key}""".format(
+{response}""".format(
     intro=INTRO_BLURB,
     instruction_key=INSTRUCTION_KEY,
     instruction="{instruction}",
@@ -66,7 +60,6 @@
     input="{input}",
     response_key=RESPONSE_KEY,
     response="{response}",
-    end_key=END_KEY,
 )
 TEXT_COLUMN_NAME = "text"
 
@@ -170,10 +163,7 @@ def tokenize_func(self, tokenizer, message):
                 new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
             )
 
-    def prepare(self, tokenizer, dataset):
-        per_device_train_batch_size = self.config.get("per_device_train_batch_size")
-        per_device_eval_batch_size = self.config.get("per_device_eval_batch_size")
-
+    def tokenize_dataset(self, tokenizer, dataset):
         group = self.config.get("group")
         block_size = self.config.get("block_size")
         tokenizer.pad_token = tokenizer.eos_token
@@ -254,7 +244,6 @@ def prepare_dataloader(self, tokenizer, dataset):
 
 
 class SlimOrcaDataPreprocess(ChatDataPreprocess):
-
     def __init__(self, config):
         super().__init__(config)
         self.default_system = "You are a helpful, respectful and honest assistant."
@@ -280,7 +269,9 @@ def create_data(self, data):
         if self.config.get("gpt_base_model"):
             if examples["human"]:
                 return PROMPT_WITH_INPUT_FORMAT.format(
-                    instruction=examples["system"], response=examples["gpt"], input=examples["human"]
+                    instruction=examples["system"],
+                    response=examples["gpt"],
+                    input=examples["human"],
                 )
             else:
                 return PROMPT_NO_INPUT_FORMAT.format(
@@ -291,12 +282,40 @@ def create_data(self, data):
                 {"role": "system", "content": INTRO_BLURB + "\n"},
                 {
                     "role": "user",
-                    "content": examples["system"]
-                               + "\n"
-                               + INPUT_KEY
-                               + examples["human"]
-                               + "\n",
+                    "content": examples["system"] + "\n" + INPUT_KEY + examples["human"] + "\n",
                 },
                 {"role": "assistant", "content": examples["gpt"] + "\n"},
             ]
             return new_messages
+
+
+class OpenOrcaDataPreprocess(ChatDataPreprocess):
+    def __init__(self, config):
+        super().__init__(config)
+        self.default_system = "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."
+
+    def create_data(self, examples):
+        if self.config.get("gpt_base_model"):
+            if not examples["system"]:
+                examples["system"] = self.default_system
+
+            if examples["question"]:
+                return PROMPT_WITH_INPUT_FORMAT.format(
+                    instruction=examples["system"],
+                    response=examples["chosen"],
+                    input=examples["question"],
+                )
+            else:
+                return PROMPT_NO_INPUT_FORMAT.format(
+                    instruction=examples["system"], response=examples["chosen"]
+                )
+        else:
+            new_messages = [
+                {"role": "system", "content": INTRO_BLURB + "\n"},
+                {
+                    "role": "user",
+                    "content": examples["system"] + "\n" + INPUT_KEY + examples["question"] + "\n",
+                },
+                {"role": "assistant", "content": examples["chosen"] + "\n"},
+            ]
+            return new_messages

From 678d6e265c381c748a7a7db4a644df9e1cfcfbeb Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Fri, 17 May 2024 15:41:37 +0800
Subject: [PATCH 22/24] update

---
 llm_on_ray/finetune/finetune.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index ae2e36c87..a4ecd9b07 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -221,7 +221,17 @@ def train_func(config: Dict[str, Any]):
         }
     )
 
-    dataprocesser = common.dataprocesser.DataProcesser.registory.get("GeneralProcesser")(
+    dataprocesser_type = config["Dataset"]["type"]
+    if dataprocesser_type == "chat":
+        preprocesser_name = "ChatDataPreprocess"
+    elif dataprocesser_type == "OpenOrca":
+        preprocesser_name = "OpenOrcaDataPreprocess"
+    elif dataprocesser_type == "SlimOrca":
+        preprocesser_name = "SlimOrcaDataPreprocess"
+    else:
+        raise ValueError(f"there is no {dataprocesser_type} dataprocesser.")
+
+    dataprocesser = common.dataprocesser.DataProcesser.registory.get(preprocesser_name)(
         config={
             "per_device_train_batch_size": config["Training"]["batch_size"],
             "per_device_eval_batch_size": config["Training"]["batch_size"],
@@ -232,6 +242,9 @@ def train_func(config: Dict[str, Any]):
             "shuffle": config["Dataset"].get("shuffle", False),
             "name": tokenizer_name,
             "config": config["General"]["config"],
+            "gpt_base_model": config["General"].get("gpt_base_model", False),
+            "chat_template": config["General"]["chat_template"],
+            "default_chat_template": config["General"]["default_chat_template"],
         }
     )
     tokenized_datasets = dataprocesser.tokenize_dataset(tokenizer, datasets)

From 294161db8cec8324977856d1a362e68575791755 Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Fri, 17 May 2024 15:45:51 +0800
Subject: [PATCH 23/24] update

---
 llm_on_ray/finetune/finetune_config.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
index 3046d96c3..0a25ad777 100644
--- a/llm_on_ray/finetune/finetune_config.py
+++ b/llm_on_ray/finetune/finetune_config.py
@@ -62,20 +62,29 @@ class General(BaseModel):
     enable_gradient_checkpointing: bool = False
     chat_template: Optional[str] = None
     default_chat_template: str = (
-        "{{ bos_token }}"
         "{% if messages[0]['role'] == 'system' %}"
-        "{{ raise_exception('System role not supported') }}"
+        "{% set loop_messages = messages[1:] %}"
+        "{% set system_message = messages[0]['content'] %}"
+        "{% else %}"
+        "{% set loop_messages = messages %}"
+        "{% set system_message = false %}"
         "{% endif %}"
-        "{% for message in messages %}"
+        "{% for message in loop_messages %}"
         "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
         "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
         "{% endif %}"
-        "{% if message['role'] == 'user' %}"
-        "{{ '### Instruction: ' + message['content'] + eos_token }}"
+        "{% if loop.index0 == 0 and system_message %}"
+        "{{ system_message }}"
+        "{ % endif %}"
+        "{ % if message['role'] == 'user' %}"
+        "{{ '### Instruction: ' + message['content'].strip() }}"
         "{% elif message['role'] == 'assistant' %}"
-        "{{ '### Response:'  + message['content'] + eos_token }}"
-        "{% endif %}{% endfor %}"
-        "{{'### End \n'}}"
+        "{{ '### Response:' + message['content'].strip() }}"
+        "{% endif %}"
+        "{% endfor %}"
+        "{% if add_generation_prompt %}"
+        "{{ '### Response: '}}"
+        "{% endif %}"
     )
 
 

From c104a3e3ec4743d7169577a724872140d0c5dfdd Mon Sep 17 00:00:00 2001
From: minmingzhu <minming.zhu@intel.com>
Date: Mon, 20 May 2024 16:30:45 +0800
Subject: [PATCH 24/24] update

---
 .../common/dataprocesser/general_processer.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 6f8a9e1e2..c51329718 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -244,8 +244,21 @@ def prepare_dataloader(self, tokenizer, dataset):
 
 
 class SlimOrcaDataPreprocess(ChatDataPreprocess):
+    chat_template = (
+        "{% for message in messages %}"
+        "{% if message['role'] == 'system' %}"
+        "{{ '### System: ' + message['content'] }}"
+        "{% elif message['role'] == 'user' %}"
+        "{{ '### User: ' + message['content'] }}"
+        "{% elif message['role'] == 'assistant' %}"
+        "{{ '### Assistant: ' + message['content'] }}"
+        "{% endif %}"
+        "{% endfor %}"
+    )
+
     def __init__(self, config):
         super().__init__(config)
+        self.config["chat_template"] = self.chat_template
         self.default_system = "You are a helpful, respectful and honest assistant."
 
     def create_data(self, data):
@@ -268,18 +281,18 @@ def create_data(self, data):
 
         if self.config.get("gpt_base_model"):
             if examples["human"]:
-                return PROMPT_WITH_INPUT_FORMAT.format(
+                return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
                     instruction=examples["system"],
                     response=examples["gpt"],
                     input=examples["human"],
                 )
             else:
-                return PROMPT_NO_INPUT_FORMAT.format(
+                return SLIMORCA_PROMPT_DICT["prompt_without_input"].format(
                     instruction=examples["system"], response=examples["gpt"]
                 )
         else:
             new_messages = [
-                {"role": "system", "content": INTRO_BLURB + "\n"},
+                {"role": "system", "content": examples["system"] + "\n"},
                 {
                     "role": "user",
                     "content": examples["system"] + "\n" + INPUT_KEY + examples["human"] + "\n",