From c32e815634996b2645d32ae08c86634d97074c97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 22 Jan 2025 11:35:46 +0000 Subject: [PATCH] typo --- trl/trainer/grpo_trainer.py | 2 +- trl/trainer/online_dpo_config.py | 4 ++-- trl/trainer/online_dpo_trainer.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py index 9649772443..9129f47e3c 100644 --- a/trl/trainer/grpo_trainer.py +++ b/trl/trainer/grpo_trainer.py @@ -184,7 +184,7 @@ def data_collator(features): # No data collation is needed in GRPO # top_p=1.0, detokenize=False, # to avoid vllm to decode (we don't need it) ) - # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation. + # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation. # A larger cache size improves speed, so we would expect gpu_memory_utilization=1. # However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded # after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough diff --git a/trl/trainer/online_dpo_config.py b/trl/trainer/online_dpo_config.py index d01294c2e5..93a534f634 100644 --- a/trl/trainer/online_dpo_config.py +++ b/trl/trainer/online_dpo_config.py @@ -63,7 +63,7 @@ class OnlineDPOConfig(TrainingArguments): disable_dropout (`bool`, *optional*, defaults to `True`): Whether to disable dropout in the model and reference model. use_vllm (`bool`, *optional*, defaults to `False`): - Whether to use the vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`). + Whether to use vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`). """ learning_rate: float = field( @@ -136,7 +136,7 @@ class OnlineDPOConfig(TrainingArguments): use_vllm: bool = field( default=False, metadata={ - "help": "Whether to use the vLLM for generating completions. Requires vLLM to be installed " + "help": "Whether to use vLLM for generating completions. Requires vLLM to be installed " "(`pip install vllm`)." }, ) diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py index 7c7a6b3169..68183401fb 100644 --- a/trl/trainer/online_dpo_trainer.py +++ b/trl/trainer/online_dpo_trainer.py @@ -262,7 +262,7 @@ def __init__( top_p=1.0, detokenize=False, # to avoid vllm to decode (we don't need it) ) - # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation. + # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation. # A larger cache size improves speed, so we would expect gpu_memory_utilization=1. # However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded # after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough