Skip to content

Commit

Permalink
[New Features] support json file data (#13)
Browse files Browse the repository at this point in the history
* delete notes and modify argument

* delete ppo_config.json

* modify format

* support json data

* modify argument
  • Loading branch information
greycooker authored Feb 26, 2025
1 parent cd12f1a commit 55bc994
Show file tree
Hide file tree
Showing 11 changed files with 191 additions and 9 deletions.
1 change: 1 addition & 0 deletions llm/alignment/ppo/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from .alpaca import *
from .base import *
from .jsondata import *
from .preference import *
from .prompt_only import *
from .safe_rlhf import *
Expand Down
1 change: 0 additions & 1 deletion llm/alignment/ppo/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ def load(name: str, /, *args: Any, **kwargs: Any) -> RawDataset:
"""Load a raw dataset by name."""
normalized_name = RawDataset.__ALIAS_NAME_MAPPING.get(name, name)
try:
# 这里的cls就是SafeRLHFTrainDataset,cls.NAME:'PKU-SafeRLHF/train'
cls = RawDataset.__REGISTRY[normalized_name]
except KeyError as ex:
raise ValueError(
Expand Down
40 changes: 40 additions & 0 deletions llm/alignment/ppo/data/jsondata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datasets import load_dataset

from .base import RawDataset, RawSample

__all__ = ["JsonDataset"]


class JsonDataset(RawDataset):
NAME: str = "Jsonfile"

def __init__(self, path: str | None = None) -> None:
self.data = load_dataset("json", data_files=path, split="train")

def __getitem__(self, index: int) -> RawSample:
data = self.data[index]
if "tgt" in data:
rawdata = RawSample(
input=data["src"],
answer=data["tgt"],
)
else:
rawdata = RawSample(input=data["src"])
return rawdata

def __len__(self) -> int:
return len(self.data) # dataset size
4 changes: 1 addition & 3 deletions llm/alignment/ppo/data/safe_rlhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ class SafeRLHFDataset(RawDataset):
PATH: ClassVar[str]

def __init__(self, path: str | None = None) -> None:
# self.data = load_dataset(path or self.PATH, split=self.SPLIT)
self.data = load_dataset('json', data_files="5ppl_train.jsonl", split="train")

self.data = load_dataset(path or self.PATH, split=self.SPLIT)

def __getitem__(self, index: int) -> RawSample:
data = self.data[index]
Expand Down
3 changes: 0 additions & 3 deletions llm/alignment/ppo/run_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,6 @@ def main():
tokenizer.pad_token_id = tokenizer.eos_token_id

if training_args.should_load_dataset:
# [('PKU-SafeRLHF/train', {'proportion': 1.0})]
train_ds = PromptOnlyDataset(
data_args.parsed_train_datasets, tokenizer=actor_tokenizer, use_rm_server=training_args.use_rm_server
)
Expand Down Expand Up @@ -450,8 +449,6 @@ def main():
),
data_collator=train_ds.get_collator(),
)
# if token_audit:
# trainer.add_callback(token_callback)

# TODO(gongenlei) resume_from_checkpoint is not ready
checkpoint = None
Expand Down
3 changes: 1 addition & 2 deletions llm/config/llama/ppo_argument.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"actor_model_name_or_path": "PKU-Alignment/alpaca-7b-reproduced",
"reward_model_name_or_path": "PKU-Alignment/beaver-7b-v1.0-reward",
"output_dir": "checkpoints/llama-ppo",
"logging_dir": "log",
"max_length": 2048,
"use_fusemt": 1,
"use_flash_attention": 1,
Expand Down Expand Up @@ -76,4 +75,4 @@
"fused_linear":1,
"autotuner_benchmark": 0,
"skip_profile_timer": 1
}
}
79 changes: 79 additions & 0 deletions tests/fixtures/llm/ppo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
ppo:
base:
train_datasets: "Jsonfile::./tests/fixtures/llm/ppo_data/train.jsonl"
eval_datasets: "Jsonfile::./tests/fixtures/llm/ppo_data/dev.jsonl"
ptx_datasets: "Jsonfile::./tests/fixtures/llm/ppo_data/ptx.jsonl"
max_length: 2048
use_fusemt: 1
use_flash_attention: 1
max_dec_len: 1024
min_dec_len: 1
top_p: 0.8
temperature: 1.0
num_return_sequences: 1
repetition_penalty: 1.0
num_train_epochs: 1
max_steps: 5
update_iters: 1
per_device_prompt_batch_size: 1
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
learning_rate: 2e-6
min_learning_rate: 2e-7
weight_decay: 0.01
lr_scheduler_type: "cosine"
warmup_ratio: 0.03
recompute: 1
recompute_granularity: "full"
recompute_use_reentrant: 1
critic_learning_rate: 2e-6
critic_min_learning_rate: 2e-7
critic_weight_decay: 0.01
critic_lr_scheduler_type: "cosine"
critic_warmup_ratio: 0.03
critic_recompute: 1
critic_recompute_granularity: "full"
normalize_reward: 1
normalize_advantage: 1
kl_coeff: 0.02
clip_range_ratio: 0.2
clip_range_score: 10.0
clip_range_value: 5.0
ptx_coeff: 16.0
logging_steps: 1
evaluation_strategy: "no"
per_device_eval_batch_size: 16
eval_steps: 10000
save_strategy: "steps"
save_steps: 400
save_total_limit: 5
bf16: 1
fp16: 0
fp16_opt_level: O2
do_train: 1
do_eval: 0
disable_tqdm: 1
sharding_parallel_degree: 1
sharding: stage1
tensor_parallel_degree: 8
tensor_parallel_output: 0
pipeline_parallel_degree: 1
pipeline_parallel_config: "disable_p2p_cache_shape"
sequence_parallel: 0
max_grad_norm: 1.0
adam_beta1: 0.9
adam_beta2: 0.95
dataloader_drop_last: 0
eval_mode: ""
offload_level: "freeze_model optimizer train_model"
release_grads: 1
seed: 23
use_fused_head_and_loss_fn: 0
autotuner_benchmark: 0
skip_profile_timer: 1
fused_linear: 1

default:
llama:
actor_model_name_or_path: __internal_testing__/tiny-random-llama
reward_model_name_or_path: __internal_testing__/tiny-random-llama
5 changes: 5 additions & 0 deletions tests/fixtures/llm/ppo_data/dev.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Aria, Jackson, Ethan, Owen, and Henry. Aria commented, \"Owen is a knight or Aria is a knight\". Jackson commented, \"If Ethan is a knight then Aria is a knight\". \"Aria is a knave if and only if Owen is a knave,\" Ethan declared. Owen was heard saying, \"If Henry is a knight then Henry is a knave\". \"Jackson is a knave if and only if Owen is a knave,\" Henry mentioned. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Aria is a knave\n(2) Jackson is a knave\n(3) Ethan is a knight\n(4) Owen is a knave\n(5) Henry is a knight"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Liam, Abigail, Oliver, Charlotte, and Joseph. According to Liam, \"Charlotte is a knight and Joseph is a knave\". Abigail said, \"Charlotte is a knave or Liam is a knight.\" According to Oliver, \"Charlotte is a knight\". Charlotte told you that Liam is a knave. In Joseph's words: \"Liam is not a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Liam is a knave\n(2) Abigail is a knave\n(3) Oliver is a knight\n(4) Charlotte is a knight\n(5) Joseph is a knight"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Ella, Victoria, Riley, Avery, and Sofia. Ella asserted: \"Ella is a knight or Victoria is a knave\". Victoria asserted: \"If Victoria is a knight then Riley is a knight\". Riley asserted: \"Ella is not a knight\". \"Sofia is a knight or Ella is a knave,\" Avery declared. Sofia expressed that If Victoria is a knave then Riley is a knave. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Ella is a knave\n(2) Victoria is a knight\n(3) Riley is a knight\n(4) Avery is a knight\n(5) Sofia is a knight"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Joseph, Michael, Elizabeth, Lucas, and Aria. \"Lucas is a knave and Elizabeth is a knave\" - Joseph. Michael said, \"Elizabeth is a knave and Elizabeth is a knight.\" In Elizabeth's words: \"Michael is not a knave\". Lucas remarked, \"Michael is not a knight\". Aria asserted: \"Aria is a knight and Joseph is a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Joseph is a knave\n(2) Michael is a knave\n(3) Elizabeth is a knave\n(4) Lucas is a knight\n(5) Aria is a knave"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Alexander, James, Ava, Logan, and Grace. Alexander told you that If Grace is a knight then Logan is a knight. James noted, \"Ava is a knave or Logan is a knight\". According to Ava, \"James is a knave if and only if Alexander is a knight\". \"Alexander is a knave if and only if James is a knight,\" Logan declared. \"Ava is a knave and Ava is a knight,\" Grace declared. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Alexander is a knight\n(2) James is a knight\n(3) Ava is a knave\n(4) Logan is a knave\n(5) Grace is a knave"}
5 changes: 5 additions & 0 deletions tests/fixtures/llm/ppo_data/ptx.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Aria, Jackson, Ethan, Owen, and Henry. Aria commented, \"Owen is a knight or Aria is a knight\". Jackson commented, \"If Ethan is a knight then Aria is a knight\". \"Aria is a knave if and only if Owen is a knave,\" Ethan declared. Owen was heard saying, \"If Henry is a knight then Henry is a knave\". \"Jackson is a knave if and only if Owen is a knave,\" Henry mentioned. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Aria is a knave\n(2) Jackson is a knave\n(3) Ethan is a knight\n(4) Owen is a knave\n(5) Henry is a knight"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Liam, Abigail, Oliver, Charlotte, and Joseph. According to Liam, \"Charlotte is a knight and Joseph is a knave\". Abigail said, \"Charlotte is a knave or Liam is a knight.\" According to Oliver, \"Charlotte is a knight\". Charlotte told you that Liam is a knave. In Joseph's words: \"Liam is not a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Liam is a knave\n(2) Abigail is a knave\n(3) Oliver is a knight\n(4) Charlotte is a knight\n(5) Joseph is a knight"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Ella, Victoria, Riley, Avery, and Sofia. Ella asserted: \"Ella is a knight or Victoria is a knave\". Victoria asserted: \"If Victoria is a knight then Riley is a knight\". Riley asserted: \"Ella is not a knight\". \"Sofia is a knight or Ella is a knave,\" Avery declared. Sofia expressed that If Victoria is a knave then Riley is a knave. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Ella is a knave\n(2) Victoria is a knight\n(3) Riley is a knight\n(4) Avery is a knight\n(5) Sofia is a knight"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Joseph, Michael, Elizabeth, Lucas, and Aria. \"Lucas is a knave and Elizabeth is a knave\" - Joseph. Michael said, \"Elizabeth is a knave and Elizabeth is a knight.\" In Elizabeth's words: \"Michael is not a knave\". Lucas remarked, \"Michael is not a knight\". Aria asserted: \"Aria is a knight and Joseph is a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Joseph is a knave\n(2) Michael is a knave\n(3) Elizabeth is a knave\n(4) Lucas is a knight\n(5) Aria is a knave"}
{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>. Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Alexander, James, Ava, Logan, and Grace. Alexander told you that If Grace is a knight then Logan is a knight. James noted, \"Ava is a knave or Logan is a knight\". According to Ava, \"James is a knave if and only if Alexander is a knight\". \"Alexander is a knave if and only if James is a knight,\" Logan declared. \"Ava is a knave and Ava is a knight,\" Grace declared. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Alexander is a knight\n(2) James is a knight\n(3) Ava is a knave\n(4) Logan is a knave\n(5) Grace is a knave"}
Loading

0 comments on commit 55bc994

Please sign in to comment.