[New Features] support json file data (#13)

* delete notes and modify argument * delete ppo_config.json * modify format * support json data * modify argument
DrownFish19 · Feb 26, 2025 · 55bc994 · 55bc994
1 parent cd12f1a
commit 55bc994
Show file tree

Hide file tree

Showing 11 changed files with 191 additions and 9 deletions.
diff --git a/llm/alignment/ppo/data/__init__.py b/llm/alignment/ppo/data/__init__.py
@@ -16,6 +16,7 @@
 
 from .alpaca import *
 from .base import *
+from .jsondata import *
 from .preference import *
 from .prompt_only import *
 from .safe_rlhf import *

diff --git a/llm/alignment/ppo/data/base.py b/llm/alignment/ppo/data/base.py
@@ -263,7 +263,6 @@ def load(name: str, /, *args: Any, **kwargs: Any) -> RawDataset:
         """Load a raw dataset by name."""
         normalized_name = RawDataset.__ALIAS_NAME_MAPPING.get(name, name)
         try:
-            # 这里的cls就是SafeRLHFTrainDataset,cls.NAME:'PKU-SafeRLHF/train'
             cls = RawDataset.__REGISTRY[normalized_name]
         except KeyError as ex:
             raise ValueError(

diff --git a/llm/alignment/ppo/data/jsondata.py b/llm/alignment/ppo/data/jsondata.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from datasets import load_dataset
+
+from .base import RawDataset, RawSample
+
+__all__ = ["JsonDataset"]
+
+
+class JsonDataset(RawDataset):
+    NAME: str = "Jsonfile"
+
+    def __init__(self, path: str | None = None) -> None:
+        self.data = load_dataset("json", data_files=path, split="train")
+
+    def __getitem__(self, index: int) -> RawSample:
+        data = self.data[index]
+        if "tgt" in data:
+            rawdata = RawSample(
+                input=data["src"],
+                answer=data["tgt"],
+            )
+        else:
+            rawdata = RawSample(input=data["src"])
+        return rawdata
+
+    def __len__(self) -> int:
+        return len(self.data)  # dataset size
diff --git a/llm/alignment/ppo/data/safe_rlhf.py b/llm/alignment/ppo/data/safe_rlhf.py
@@ -35,9 +35,7 @@ class SafeRLHFDataset(RawDataset):
     PATH: ClassVar[str]
 
     def __init__(self, path: str | None = None) -> None:
-        # self.data = load_dataset(path or self.PATH, split=self.SPLIT)
-        self.data = load_dataset('json', data_files="5ppl_train.jsonl", split="train")
-
+        self.data = load_dataset(path or self.PATH, split=self.SPLIT)
 
     def __getitem__(self, index: int) -> RawSample:
         data = self.data[index]

diff --git a/llm/alignment/ppo/run_ppo.py b/llm/alignment/ppo/run_ppo.py
@@ -390,7 +390,6 @@ def main():
             tokenizer.pad_token_id = tokenizer.eos_token_id
 
     if training_args.should_load_dataset:
-        # [('PKU-SafeRLHF/train', {'proportion': 1.0})]
         train_ds = PromptOnlyDataset(
             data_args.parsed_train_datasets, tokenizer=actor_tokenizer, use_rm_server=training_args.use_rm_server
         )
@@ -450,8 +449,6 @@ def main():
         ),
         data_collator=train_ds.get_collator(),
     )
-    # if token_audit:
-    #     trainer.add_callback(token_callback)
 
     # TODO(gongenlei) resume_from_checkpoint is not ready
     checkpoint = None

diff --git a/llm/config/llama/ppo_argument.json b/llm/config/llama/ppo_argument.json
@@ -5,7 +5,6 @@
     "actor_model_name_or_path": "PKU-Alignment/alpaca-7b-reproduced",
     "reward_model_name_or_path": "PKU-Alignment/beaver-7b-v1.0-reward",
     "output_dir": "checkpoints/llama-ppo",
-    "logging_dir": "log",
     "max_length": 2048,
     "use_fusemt": 1,
     "use_flash_attention": 1,
@@ -76,4 +75,4 @@
     "fused_linear":1,
     "autotuner_benchmark": 0,
     "skip_profile_timer": 1
-}
+}
diff --git a/tests/fixtures/llm/ppo.yaml b/tests/fixtures/llm/ppo.yaml
@@ -0,0 +1,79 @@
+ppo:
+  base:
+    train_datasets: "Jsonfile::./tests/fixtures/llm/ppo_data/train.jsonl"
+    eval_datasets: "Jsonfile::./tests/fixtures/llm/ppo_data/dev.jsonl"
+    ptx_datasets: "Jsonfile::./tests/fixtures/llm/ppo_data/ptx.jsonl"
+    max_length: 2048
+    use_fusemt: 1
+    use_flash_attention: 1
+    max_dec_len: 1024
+    min_dec_len: 1
+    top_p: 0.8
+    temperature: 1.0
+    num_return_sequences: 1
+    repetition_penalty: 1.0
+    num_train_epochs: 1
+    max_steps: 5
+    update_iters: 1
+    per_device_prompt_batch_size: 1
+    per_device_train_batch_size: 1
+    gradient_accumulation_steps: 1
+    learning_rate: 2e-6
+    min_learning_rate: 2e-7
+    weight_decay: 0.01
+    lr_scheduler_type: "cosine"
+    warmup_ratio: 0.03
+    recompute: 1
+    recompute_granularity: "full"
+    recompute_use_reentrant: 1
+    critic_learning_rate: 2e-6
+    critic_min_learning_rate: 2e-7
+    critic_weight_decay: 0.01
+    critic_lr_scheduler_type: "cosine"
+    critic_warmup_ratio: 0.03
+    critic_recompute: 1
+    critic_recompute_granularity: "full"
+    normalize_reward: 1
+    normalize_advantage: 1
+    kl_coeff: 0.02
+    clip_range_ratio: 0.2
+    clip_range_score: 10.0
+    clip_range_value: 5.0
+    ptx_coeff: 16.0
+    logging_steps: 1
+    evaluation_strategy: "no"
+    per_device_eval_batch_size: 16
+    eval_steps: 10000
+    save_strategy: "steps"
+    save_steps: 400
+    save_total_limit: 5
+    bf16: 1
+    fp16: 0
+    fp16_opt_level: O2
+    do_train: 1
+    do_eval: 0
+    disable_tqdm: 1
+    sharding_parallel_degree: 1
+    sharding: stage1
+    tensor_parallel_degree: 8
+    tensor_parallel_output: 0
+    pipeline_parallel_degree: 1
+    pipeline_parallel_config: "disable_p2p_cache_shape"
+    sequence_parallel: 0
+    max_grad_norm: 1.0
+    adam_beta1: 0.9
+    adam_beta2: 0.95
+    dataloader_drop_last: 0
+    eval_mode: ""
+    offload_level: "freeze_model optimizer train_model"
+    release_grads: 1
+    seed: 23
+    use_fused_head_and_loss_fn: 0
+    autotuner_benchmark: 0
+    skip_profile_timer: 1
+    fused_linear: 1
+
+  default:
+    llama:
+      actor_model_name_or_path: __internal_testing__/tiny-random-llama
+      reward_model_name_or_path: __internal_testing__/tiny-random-llama
diff --git a/tests/fixtures/llm/ppo_data/dev.jsonl b/tests/fixtures/llm/ppo_data/dev.jsonl
@@ -0,0 +1,5 @@
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Aria, Jackson, Ethan, Owen, and Henry. Aria commented, \"Owen is a knight or Aria is a knight\". Jackson commented, \"If Ethan is a knight then Aria is a knight\". \"Aria is a knave if and only if Owen is a knave,\" Ethan declared. Owen was heard saying, \"If Henry is a knight then Henry is a knave\". \"Jackson is a knave if and only if Owen is a knave,\" Henry mentioned. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Aria is a knave\n(2) Jackson is a knave\n(3) Ethan is a knight\n(4) Owen is a knave\n(5) Henry is a knight"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Liam, Abigail, Oliver, Charlotte, and Joseph. According to Liam, \"Charlotte is a knight and Joseph is a knave\". Abigail said, \"Charlotte is a knave or Liam is a knight.\" According to Oliver, \"Charlotte is a knight\". Charlotte told you that Liam is a knave. In Joseph's words: \"Liam is not a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Liam is a knave\n(2) Abigail is a knave\n(3) Oliver is a knight\n(4) Charlotte is a knight\n(5) Joseph is a knight"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Ella, Victoria, Riley, Avery, and Sofia. Ella asserted: \"Ella is a knight or Victoria is a knave\". Victoria asserted: \"If Victoria is a knight then Riley is a knight\". Riley asserted: \"Ella is not a knight\". \"Sofia is a knight or Ella is a knave,\" Avery declared. Sofia expressed that If Victoria is a knave then Riley is a knave. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Ella is a knave\n(2) Victoria is a knight\n(3) Riley is a knight\n(4) Avery is a knight\n(5) Sofia is a knight"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Joseph, Michael, Elizabeth, Lucas, and Aria. \"Lucas is a knave and Elizabeth is a knave\" - Joseph. Michael said, \"Elizabeth is a knave and Elizabeth is a knight.\" In Elizabeth's words: \"Michael is not a knave\". Lucas remarked, \"Michael is not a knight\". Aria asserted: \"Aria is a knight and Joseph is a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Joseph is a knave\n(2) Michael is a knave\n(3) Elizabeth is a knave\n(4) Lucas is a knight\n(5) Aria is a knave"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Alexander, James, Ava, Logan, and Grace. Alexander told you that If Grace is a knight then Logan is a knight. James noted, \"Ava is a knave or Logan is a knight\". According to Ava, \"James is a knave if and only if Alexander is a knight\". \"Alexander is a knave if and only if James is a knight,\" Logan declared. \"Ava is a knave and Ava is a knight,\" Grace declared. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Alexander is a knight\n(2) James is a knight\n(3) Ava is a knave\n(4) Logan is a knave\n(5) Grace is a knave"}
diff --git a/tests/fixtures/llm/ppo_data/ptx.jsonl b/tests/fixtures/llm/ppo_data/ptx.jsonl
@@ -0,0 +1,5 @@
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Aria, Jackson, Ethan, Owen, and Henry. Aria commented, \"Owen is a knight or Aria is a knight\". Jackson commented, \"If Ethan is a knight then Aria is a knight\". \"Aria is a knave if and only if Owen is a knave,\" Ethan declared. Owen was heard saying, \"If Henry is a knight then Henry is a knave\". \"Jackson is a knave if and only if Owen is a knave,\" Henry mentioned. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Aria is a knave\n(2) Jackson is a knave\n(3) Ethan is a knight\n(4) Owen is a knave\n(5) Henry is a knight"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Liam, Abigail, Oliver, Charlotte, and Joseph. According to Liam, \"Charlotte is a knight and Joseph is a knave\". Abigail said, \"Charlotte is a knave or Liam is a knight.\" According to Oliver, \"Charlotte is a knight\". Charlotte told you that Liam is a knave. In Joseph's words: \"Liam is not a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Liam is a knave\n(2) Abigail is a knave\n(3) Oliver is a knight\n(4) Charlotte is a knight\n(5) Joseph is a knight"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Ella, Victoria, Riley, Avery, and Sofia. Ella asserted: \"Ella is a knight or Victoria is a knave\". Victoria asserted: \"If Victoria is a knight then Riley is a knight\". Riley asserted: \"Ella is not a knight\". \"Sofia is a knight or Ella is a knave,\" Avery declared. Sofia expressed that If Victoria is a knave then Riley is a knave. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Ella is a knave\n(2) Victoria is a knight\n(3) Riley is a knight\n(4) Avery is a knight\n(5) Sofia is a knight"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Joseph, Michael, Elizabeth, Lucas, and Aria. \"Lucas is a knave and Elizabeth is a knave\" - Joseph. Michael said, \"Elizabeth is a knave and Elizabeth is a knight.\" In Elizabeth's words: \"Michael is not a knave\". Lucas remarked, \"Michael is not a knight\". Aria asserted: \"Aria is a knight and Joseph is a knight\". So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Joseph is a knave\n(2) Michael is a knave\n(3) Elizabeth is a knave\n(4) Lucas is a knight\n(5) Aria is a knave"}
+{"src": "<|im_start|>system\nYou are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to solve a logical reasoning problem. After thinking, when you finally reach a conclusion, clearly state the identity of each character within <answer> </answer> tags. i.e., <answer> (1) Zoey is a knight\n(2) ... </answer>.\n<|im_end|>\n<|im_start|>user\nA very special island is inhabited only by knights and knaves. Knights always tell the truth, and knaves always lie. You meet 5 inhabitants: Alexander, James, Ava, Logan, and Grace. Alexander told you that If Grace is a knight then Logan is a knight. James noted, \"Ava is a knave or Logan is a knight\". According to Ava, \"James is a knave if and only if Alexander is a knight\". \"Alexander is a knave if and only if James is a knight,\" Logan declared. \"Ava is a knave and Ava is a knight,\" Grace declared. So who is a knight and who is a knave?\n<|im_end|>\n<|im_start|>assistant\n<think>", "tgt": "(1) Alexander is a knight\n(2) James is a knight\n(3) Ava is a knave\n(4) Logan is a knave\n(5) Grace is a knave"}