Skip to content

Commit

Permalink
typo
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec committed Jan 22, 2025
1 parent 59eafd0 commit c32e815
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion trl/trainer/grpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def data_collator(features): # No data collation is needed in GRPO
# top_p=1.0,
detokenize=False, # to avoid vllm to decode (we don't need it)
)
# vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation.
# vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation.
# A larger cache size improves speed, so we would expect gpu_memory_utilization=1.
# However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded
# after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough
Expand Down
4 changes: 2 additions & 2 deletions trl/trainer/online_dpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class OnlineDPOConfig(TrainingArguments):
disable_dropout (`bool`, *optional*, defaults to `True`):
Whether to disable dropout in the model and reference model.
use_vllm (`bool`, *optional*, defaults to `False`):
Whether to use the vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`).
Whether to use vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`).
"""

learning_rate: float = field(
Expand Down Expand Up @@ -136,7 +136,7 @@ class OnlineDPOConfig(TrainingArguments):
use_vllm: bool = field(
default=False,
metadata={
"help": "Whether to use the vLLM for generating completions. Requires vLLM to be installed "
"help": "Whether to use vLLM for generating completions. Requires vLLM to be installed "
"(`pip install vllm`)."
},
)
Expand Down
2 changes: 1 addition & 1 deletion trl/trainer/online_dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def __init__(
top_p=1.0,
detokenize=False, # to avoid vllm to decode (we don't need it)
)
# vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation.
# vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation.
# A larger cache size improves speed, so we would expect gpu_memory_utilization=1.
# However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded
# after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough
Expand Down

0 comments on commit c32e815

Please sign in to comment.