Skip to content

Commit

Permalink
🔍 Update GRPO config documentation for beta parameter stability (#2992)
Browse files Browse the repository at this point in the history
  • Loading branch information
nopepper authored Feb 28, 2025
1 parent b882f57 commit 7bc9858
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions trl/trainer/grpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class GRPOConfig(TrainingArguments):
[`~transformers.TrainingArguments`].
beta (`float`, *optional*, defaults to `0.04`):
KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving training
speed.
speed, but may be numerically unstable for long training runs.
num_iterations (`int`, *optional*, defaults to `1`):
Number of iterations per batch (denoted as μ in the algorithm).
epsilon (`float`, *optional*, defaults to `0.2`):
Expand Down Expand Up @@ -235,7 +235,7 @@ class GRPOConfig(TrainingArguments):
default=0.04,
metadata={
"help": "KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving "
"training speed."
"training speed, but may be numerically unstable for long training runs."
},
)
num_iterations: int = field(
Expand Down

0 comments on commit 7bc9858

Please sign in to comment.