From f74a1a170874f5d87a0f9ae3a28c18b89bc80a8e Mon Sep 17 00:00:00 2001 From: Francois Ledoyen Date: Wed, 12 Feb 2025 19:20:08 +0100 Subject: [PATCH] update get_input_samples includes task_ids when n_tasks is given --- tests/test_methods/base.py | 26 ++++++++++++------- tests/test_methods/method_test_impl/base.py | 10 +++---- tests/test_methods/test_on_clip/test_model.py | 3 ++- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index f5e53fedd..4b7275e9d 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -22,10 +22,6 @@ class AbstractAdapterTestBase: do_run_train_tests = True num_labels = 2 - def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): - """Creates a dummy batch of samples in the format required for the model.""" - raise NotImplementedError("get_input_samples() must be implemented in the subclass.") - def add_head(self, model, name, **kwargs): """Adds a dummy head to the model.""" raise NotImplementedError("add_head() must be implemented in the subclass.") @@ -42,6 +38,12 @@ def attach_labels(self, inputs): """Attaches labels to the input samples.""" raise NotImplementedError("attach_labels() with respective label shape must be implemented in the subclass.") + def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): + in_data = {} + if "n_tasks" in kwargs: + in_data["task_ids"] = torch.randint(0, kwargs["n_tasks"], (shape[0],)) + return in_data + def get_model(self): """Builds a model instance for testing based on the provied model configuration.""" if self.model_class == AutoAdapterModel: @@ -91,13 +93,13 @@ class TextAdapterTestBase(AbstractAdapterTestBase): def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): shape = shape or self.input_shape + in_data = super().get_input_samples(shape, vocab_size, config, **kwargs) input_ids = self.build_rand_ids_tensor(shape, vocab_size=vocab_size) - # Ensures that only tha last token in each sample is the eos token (needed e.g. for BART) if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) input_ids[:, -1] = config.eos_token_id - in_data = {"input_ids": input_ids} + in_data["input_ids"] = input_ids # Add decoder input ids for models with a decoder if config and config.is_encoder_decoder: @@ -105,6 +107,7 @@ def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): if "num_labels" in kwargs: in_data["labels"] = self.build_rand_ids_tensor(shape[:-1], vocab_size=kwargs["num_labels"]) + return in_data def add_head(self, model, name, **kwargs): @@ -118,7 +121,9 @@ def get_dataset(self, tokenizer=None): if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token data_args = GlueDataTrainingArguments( - task_name="mrpc", data_dir="./hf_transformers/tests/fixtures/tests_samples/MRPC", overwrite_cache=True + task_name="mrpc", + data_dir="./hf_transformers/tests/fixtures/tests_samples/MRPC", + overwrite_cache=True, ) return GlueDataset(data_args, tokenizer=tokenizer, mode="train") @@ -143,8 +148,10 @@ class VisionAdapterTestBase(AbstractAdapterTestBase): def get_input_samples(self, shape=None, config=None, dtype=torch.float, **kwargs): shape = shape or self.input_shape + in_data = super().get_input_samples(shape, config=config, **kwargs) pixel_values = self.build_rand_tensor(shape, dtype=dtype) - return {"pixel_values": pixel_values} + in_data["pixel_values"] = pixel_values + return in_data def add_head(self, model, name, **kwargs): kwargs["num_labels"] = 10 if "num_labels" not in kwargs else kwargs["num_labels"] @@ -198,7 +205,8 @@ def add_head(self, model, name, head_type="seq2seq_lm", **kwargs): def get_input_samples(self, shape=None, config=None, **kwargs): shape = shape or self.input_shape - in_data = {"input_features": self.build_rand_tensor(shape, dtype=torch.float)} + in_data = super().get_input_samples(shape, config=config, **kwargs) + in_data["input_features"] = self.build_rand_tensor(shape, dtype=torch.float) # Add decoder input ids for models with a decoder if config and config.is_encoder_decoder: diff --git a/tests/test_methods/method_test_impl/base.py b/tests/test_methods/method_test_impl/base.py index 84ed23432..379bcbf16 100644 --- a/tests/test_methods/method_test_impl/base.py +++ b/tests/test_methods/method_test_impl/base.py @@ -157,16 +157,16 @@ def run_get_test(self, model, adapter_config, num_expected_modules): model.delete_adapter("first") - def run_forward_test(self, model, adapter_config, dtype=torch.float32, adapter_setup=None): + def run_forward_test(self, model, adapter_config, dtype=torch.float32, **kwargs): model.eval() name = adapter_config.__class__.__name__ - adapter_setup = adapter_setup or name + adapter_setup = kwargs.get("adapter_setup") or name if name not in model.adapters_config: model.add_adapter(name, config=adapter_config) model.to(torch_device).to(dtype) - input_data = self.get_input_samples(config=model.config, dtype=dtype) + input_data = self.get_input_samples(config=model.config, dtype=dtype, **kwargs) # pass 1: set adapter via property model.set_active_adapters(adapter_setup) @@ -192,7 +192,7 @@ def run_forward_test(self, model, adapter_config, dtype=torch.float32, adapter_s model.set_active_adapters(None) model.delete_adapter(name) - def run_load_test(self, adapter_config): + def run_load_test(self, adapter_config, **kwargs): model1, model2 = create_twin_models(self.model_class, self.config) name = "dummy_adapter" @@ -221,7 +221,7 @@ def run_load_test(self, adapter_config): self.assertTrue(name in model2.adapters_config) # check equal output - input_data = self.get_input_samples(config=model1.config) + input_data = self.get_input_samples(config=model1.config, **kwargs) model1.to(torch_device) model2.to(torch_device) output1 = model1(**input_data) diff --git a/tests/test_methods/test_on_clip/test_model.py b/tests/test_methods/test_on_clip/test_model.py index 6bff937bc..56c09b7cf 100644 --- a/tests/test_methods/test_on_clip/test_model.py +++ b/tests/test_methods/test_on_clip/test_model.py @@ -36,6 +36,7 @@ class CLIPAdapterTestBase(TextAdapterTestBase): def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **kwargs): # text inputs shape = self.default_text_input_samples_shape + in_data = super().get_input_samples(shape, vocab_size, config, **kwargs) total_dims = 1 for dim in shape: total_dims *= dim @@ -47,7 +48,7 @@ def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **k if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) input_ids[:, -1] = config.eos_token_id - in_data = {"input_ids": input_ids} + in_data["input_ids"] = input_ids # vision inputs shape = self.default_vision_input_samples_shape