From cbeb44e0cd994ff6b4870224b8b6ac4f5c3ed9d1 Mon Sep 17 00:00:00 2001 From: Dan Saattrup Nielsen Date: Wed, 22 Nov 2023 14:17:45 +0100 Subject: [PATCH] feat: Automatically set hyperparameters related to multi-GPU --- config/config.yaml | 5 ++++- config/model/wav2vec2.yaml | 2 +- src/scripts/finetune_model.py | 9 +++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index a59047f6..ec8017b8 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -18,7 +18,10 @@ seed: 4242 characters_to_keep: 'abcdefghijklmnopqrstuvwxyzæøå0123456789éü ' max_seconds_per_example: 10 dataloader_num_workers: 8 -padding: longest # longest/max_length/do_not_pad + +# Can be `longest`, `max_length` or `do_not_pad` +# NOTE: This is automatically set to `max_length` in a multi-gpu setting +padding: longest # This is a list of the sampling probability of each dataset, where null means that # each dataset will be sampled equally often diff --git a/config/model/wav2vec2.yaml b/config/model/wav2vec2.yaml index 6f315c8f..768954ca 100644 --- a/config/model/wav2vec2.yaml +++ b/config/model/wav2vec2.yaml @@ -18,7 +18,7 @@ mask_time_prob: 0.5 mask_time_length: 10 mask_feature_prob: 0.5 mask_feature_length: 64 -layerdrop: 0.0 # NOTE: This parameter cannot be used in a multi-gpu setting! +layerdrop: 0.1 # This will automatically be set to 0 in a multi-gpu setting ctc_loss_reduction: mean # Decoder hyperparameters diff --git a/src/scripts/finetune_model.py b/src/scripts/finetune_model.py index b1e9b197..95e25d66 100644 --- a/src/scripts/finetune_model.py +++ b/src/scripts/finetune_model.py @@ -6,6 +6,7 @@ import hydra from omegaconf import DictConfig +import os from coral_models.finetune import finetune @@ -18,6 +19,14 @@ def main(cfg: DictConfig) -> None: cfg (DictConfig): The Hydra configuration object. """ + # In case we are running in a multi-GPU setting, we need to force certain + # hyperparameters + if os.getenv("WORLD_SIZE") is not None: + if "layerdrop" in cfg.model: + cfg.model.layerdrop = 0.0 + cfg.padding = "max_length" + + breakpoint() finetune(cfg)