typo

huggingface · Jan 22, 2025 · c32e815 · c32e815
1 parent 59eafd0
commit c32e815
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 4 deletions.
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -184,7 +184,7 @@ def data_collator(features):  # No data collation is needed in GRPO
                 # top_p=1.0,
                 detokenize=False,  # to avoid vllm to decode (we don't need it)
             )
-            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation.
+            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation.
             # A larger cache size improves speed, so we would expect gpu_memory_utilization=1.
             # However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded
             # after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough

diff --git a/trl/trainer/online_dpo_config.py b/trl/trainer/online_dpo_config.py
@@ -63,7 +63,7 @@ class OnlineDPOConfig(TrainingArguments):
         disable_dropout (`bool`, *optional*, defaults to `True`):
             Whether to disable dropout in the model and reference model.
         use_vllm (`bool`, *optional*, defaults to `False`):
-            Whether to use the vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`).
+            Whether to use vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`).
     """
 
     learning_rate: float = field(
@@ -136,7 +136,7 @@ class OnlineDPOConfig(TrainingArguments):
     use_vllm: bool = field(
         default=False,
         metadata={
-            "help": "Whether to use the vLLM for generating completions. Requires vLLM to be installed "
+            "help": "Whether to use vLLM for generating completions. Requires vLLM to be installed "
             "(`pip install vllm`)."
         },
     )

diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py
@@ -262,7 +262,7 @@ def __init__(
                 top_p=1.0,
                 detokenize=False,  # to avoid vllm to decode (we don't need it)
             )
-            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation.
+            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation.
             # A larger cache size improves speed, so we would expect gpu_memory_utilization=1.
             # However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded
             # after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough