From c32e815634996b2645d32ae08c86634d97074c97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?=
 <quentin.gallouedec@huggingface.co>
Date: Wed, 22 Jan 2025 11:35:46 +0000
Subject: [PATCH] typo

---
 trl/trainer/grpo_trainer.py       | 2 +-
 trl/trainer/online_dpo_config.py  | 4 ++--
 trl/trainer/online_dpo_trainer.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
index 9649772443..9129f47e3c 100644
--- a/trl/trainer/grpo_trainer.py
+++ b/trl/trainer/grpo_trainer.py
@@ -184,7 +184,7 @@ def data_collator(features):  # No data collation is needed in GRPO
                 # top_p=1.0,
                 detokenize=False,  # to avoid vllm to decode (we don't need it)
             )
-            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation.
+            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation.
             # A larger cache size improves speed, so we would expect gpu_memory_utilization=1.
             # However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded
             # after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough
diff --git a/trl/trainer/online_dpo_config.py b/trl/trainer/online_dpo_config.py
index d01294c2e5..93a534f634 100644
--- a/trl/trainer/online_dpo_config.py
+++ b/trl/trainer/online_dpo_config.py
@@ -63,7 +63,7 @@ class OnlineDPOConfig(TrainingArguments):
         disable_dropout (`bool`, *optional*, defaults to `True`):
             Whether to disable dropout in the model and reference model.
         use_vllm (`bool`, *optional*, defaults to `False`):
-            Whether to use the vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`).
+            Whether to use vLLM for generating completions. Requires vLLM to be installed (`pip install vllm`).
     """
 
     learning_rate: float = field(
@@ -136,7 +136,7 @@ class OnlineDPOConfig(TrainingArguments):
     use_vllm: bool = field(
         default=False,
         metadata={
-            "help": "Whether to use the vLLM for generating completions. Requires vLLM to be installed "
+            "help": "Whether to use vLLM for generating completions. Requires vLLM to be installed "
             "(`pip install vllm`)."
         },
     )
diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py
index 7c7a6b3169..68183401fb 100644
--- a/trl/trainer/online_dpo_trainer.py
+++ b/trl/trainer/online_dpo_trainer.py
@@ -262,7 +262,7 @@ def __init__(
                 top_p=1.0,
                 detokenize=False,  # to avoid vllm to decode (we don't need it)
             )
-            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instanciation.
+            # vLLM dynamically adjusts the size of the key-value cache based on available GPU memory at instantiation.
             # A larger cache size improves speed, so we would expect gpu_memory_utilization=1.
             # However, at this stage, the optimizer's weights are not yet loaded onto the GPU; they will be loaded
             # after the first optimizer step and remain in GPU memory throughout training. So we must reserve enough