Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Signed-off-by: minmingzhu <[email protected]>
  • Loading branch information
minmingzhu committed May 16, 2024
1 parent f0d94d1 commit 6075c2c
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 36 deletions.
51 changes: 23 additions & 28 deletions llm_on_ray/common/dataprocesser/general_processer.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,19 @@ def create_data(self, examples):
)
else:
new_messages = [
{
"role": "system",
"content": INTRO_BLURB + "\n",
},
{
"role": "user",
"content": examples["instruction"]
+ "\n\n"
+ "\n"
+ INPUT_KEY
+ examples["context"]
+ "\n\n",
+ "\n",
},
{"role": "assistant", "content": examples["response"] + "\n\n"},
{"role": "assistant", "content": examples["response"] + "\n"},
]

return new_messages
Expand All @@ -162,7 +166,6 @@ def tokenize_func(self, tokenizer, message):
message,
tokenize=False,
)
print(new_tokenizer)
return tokenizer(
new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
)
Expand Down Expand Up @@ -251,21 +254,9 @@ def prepare_dataloader(self, tokenizer, dataset):


class SlimOrcaDataPreprocess(ChatDataPreprocess):
chat_template = (
"{% for message in messages %}"
"{% if message['role'] == 'system' %}"
"{{ '### System: ' + message['content'] }}"
"{% elif message['role'] == 'user' %}"
"{{ '### User: ' + message['content'] }}"
"{% elif message['role'] == 'assistant' %}"
"{{ '### Assistant: ' + message['content'] }}"
"{% endif %}"
"{% endfor %}"
)

def __init__(self, config):
super().__init__(config)
self.config["chat_template"] = self.chat_template
self.default_system = "You are a helpful, respectful and honest assistant."

def create_data(self, data):
Expand All @@ -286,22 +277,26 @@ def create_data(self, data):
examples[conv[j]["from"]] = conv[j]["value"]
examples[conv[j + 1]["from"]] = conv[j + 1]["value"]

new_messages = [
{"role": "system", "content": examples["system"] + "\n"},
{
"role": "user",
"content": examples["human"] + "\n",
},
{"role": "assistant", "content": examples["gpt"] + "\n"},
]
if self.config.get("gpt_base_model"):
if examples["human"]:
return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
system=examples["system"], user=examples["human"], gpt=examples["gpt"]
return PROMPT_WITH_INPUT_FORMAT.format(
instruction=examples["system"], response=examples["gpt"], input=examples["human"]
)
else:
return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
system=examples["human"], gpt=examples["gpt"]
return PROMPT_NO_INPUT_FORMAT.format(
instruction=examples["system"], response=examples["gpt"]
)
else:
new_messages = [
{"role": "system", "content": INTRO_BLURB + "\n"},
{
"role": "user",
"content": examples["system"]
+ "\n"
+ INPUT_KEY
+ examples["human"]
+ "\n",
},
{"role": "assistant", "content": examples["gpt"] + "\n"},
]
return new_messages
28 changes: 20 additions & 8 deletions tests/finetune/test_chat_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,26 @@ def setUp(self):
"gpt_base_model": True,
"max_length": 512,
"trust_remote_code": False,
"chat_template": "Below is an instruction that describes a task. Write a response that appropriately "
"completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
"'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
"'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
"must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
"== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
"'assistant' %}{{ '### Response: ' + message['content'] }}{% endif %}{% endfor %}{{'### "
"End \n'}}",
"chat_template": "{% if messages[0]['role'] == 'system' %}"
"{% set loop_messages = messages[1:] %}"
"{% set system_message = messages[0]['content'] %}"
"{% else %}"
"{% set loop_messages = messages %}"
"{% set system_message = false %}"
"{% endif %}"
"{% for message in loop_messages %}"
"{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
"{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
"{% endif %}"
"{% if loop.index0 == 0 and system_message %}"
"{{ system_message }}"
"{% endif %}"
"{% if message['role'] == 'user' %}"
"{{ '### Instruction: ' + message['content'] + eos_token }}"
"{% elif message['role'] == 'assistant' %}"
"{{ '### Response:' + message['content'] + eos_token }}"
"{% endif %}{% endfor %}"
"{{'### End \n'}}",
}
self.processer = ChatDataPreprocess(self.config)

Expand Down

0 comments on commit 6075c2c

Please sign in to comment.