Skip to content

Commit

Permalink
Merge branch 'vllm-onlinedpo' of https://github.com/huggingface/trl i…
Browse files Browse the repository at this point in the history
…nto vllm-onlinedpo
  • Loading branch information
qgallouedec committed Jan 15, 2025
2 parents e2fc58f + ac5e31f commit b7112fb
Show file tree
Hide file tree
Showing 17 changed files with 72 additions and 71 deletions.
8 changes: 4 additions & 4 deletions trl/trainer/alignprop_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
10 changes: 5 additions & 5 deletions trl/trainer/bco_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1493,11 +1493,11 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
tags (`str`, `list[str]` or None, *optional*, defaults to `None`):
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/cpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,10 +1026,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/ddpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,10 +603,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1525,10 +1525,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/gkd_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/iterative_sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/kto_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1501,10 +1501,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
10 changes: 5 additions & 5 deletions trl/trainer/nash_md_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand All @@ -492,7 +492,7 @@ def create_model_card(

citation = textwrap.dedent("""\
@inproceedings{munos2024nash,
title = {Nash Learning from Human Feedback},
title = {{Nash Learning from Human Feedback}},
author = {R{\'{e}}mi Munos and Michal Valko and Daniele Calandriello and Mohammad Gheshlaghi Azar and Mark Rowland and Zhaohan Daniel Guo and Yunhao Tang and Matthieu Geist and Thomas Mesnard and C{\\^{o}}me Fiegel and Andrea Michi and Marco Selvi and Sertan Girgin and Nikola Momchev and Olivier Bachem and Daniel J. Mankowitz and Doina Precup and Bilal Piot},
year = 2024,
booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024},
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/online_dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,10 +778,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/orpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,10 +1055,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,10 +747,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
11 changes: 6 additions & 5 deletions trl/trainer/prm_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,12 @@ def create_model_card(
):
"""
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand All @@ -313,7 +314,7 @@ def create_model_card(

citation = textwrap.dedent("""\
@article{uesato2022solving,
title = {Solving Math Word Problems With Process- and Outcome-Based Feedback},
title = {{Solving Math Word Problems With Process- and Outcome-Based Feedback}},
author = {Uesato, Jonathan and Kushman, Nate and Kumar, Ramana and Song, Francis and Siegel, Noah and Wang, Lisa and Creswell, Antonia and Irving, Geoffrey and Higgins, Irina},
year = 2022,
journal = {arXiv preprint arXiv:2211.14275}
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/reward_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/rloo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,10 +613,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/xpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,10 +525,10 @@ def create_model_card(
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str`, *optional*, defaults to `None`):
The name of the model.
dataset_name (`str`, *optional*, defaults to `None`):
The name of the dataset used for training.
model_name (`str` or `None`, *optional*, defaults to `None`):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
Tags to be associated with the model card.
"""
Expand Down

0 comments on commit b7112fb

Please sign in to comment.