From 8a5c99c748897a30c2cd2cd86006f2936abab8f9 Mon Sep 17 00:00:00 2001 From: calpt Date: Sat, 22 Jun 2024 17:58:19 +0000 Subject: [PATCH 1/4] Fix moving adapter head to device. Minor update for GLUE example. --- .../pytorch/text-classification/run_glue.py | 22 +++++++++---------- src/adapters/heads/model_mixin.py | 16 ++++++++++++++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 5786e0df55..4ed66c8aae 100644 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -189,12 +189,12 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) @@ -277,7 +277,7 @@ def main(): "glue", data_args.task_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. @@ -285,7 +285,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading a dataset from your local files. @@ -314,7 +314,7 @@ def main(): "csv", data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading a dataset from local json files @@ -322,7 +322,7 @@ def main(): "json", data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -357,14 +357,14 @@ def main(): finetuning_task=data_args.task_name, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # We use the AutoAdapterModel class here for better adapter support. model = AutoAdapterModel.from_pretrained( @@ -373,7 +373,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) diff --git a/src/adapters/heads/model_mixin.py b/src/adapters/heads/model_mixin.py index 4e0dfde84b..fb6389f688 100644 --- a/src/adapters/heads/model_mixin.py +++ b/src/adapters/heads/model_mixin.py @@ -674,6 +674,22 @@ def get_labels(self, head_name=None): else: return list(label_dict.values()) + def adapter_to( + self, name: str, device: Optional[Union[torch.device, str]] = None, dtype: Optional[torch.dtype] = None + ): + """ + Moves the adapter with the given name to the specified device and data type. + + Args: + name (str): The name of the adapter to be moved. + device (torch.device or str, optional): The device on which the adapter should be moved. + dtype (torch.dtype, optional): The data type to which the adapter should be cast. + """ + super().adapter_to(name, device, dtype) + # Move heads to correct device + if name in self.heads: + self.heads[name].to(device=device, dtype=dtype) + # This method is called during model loading in from_pretrained() to apply the state_dict to the model. # Override it to inject adapter head logic. @classmethod From af631fbcdafffe3879326afef425b0d376b82781 Mon Sep 17 00:00:00 2001 From: calpt Date: Sun, 23 Jun 2024 10:52:36 +0200 Subject: [PATCH 2/4] Add example tests to CI --- .github/workflows/tests_torch.yml | 24 +++++++ examples/pytorch/test_xla_examples.py | 94 --------------------------- examples/pytorch/xla_spawn.py | 83 ----------------------- 3 files changed, 24 insertions(+), 177 deletions(-) delete mode 100644 examples/pytorch/test_xla_examples.py delete mode 100644 examples/pytorch/xla_spawn.py diff --git a/.github/workflows/tests_torch.yml b/.github/workflows/tests_torch.yml index 215d695dd8..cf61485852 100644 --- a/.github/workflows/tests_torch.yml +++ b/.github/workflows/tests_torch.yml @@ -90,3 +90,27 @@ jobs: - name: Test run: | make test-adapter-models + test_adapter_examples: + timeout-minutes: 60 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install + run: | + pip install torch==2.1.2 + pip install .[sklearn,testing,sentencepiece] + pip install conllu + - name: Test Examples + run: | + make test-examples diff --git a/examples/pytorch/test_xla_examples.py b/examples/pytorch/test_xla_examples.py deleted file mode 100644 index 4a29ce3bee..0000000000 --- a/examples/pytorch/test_xla_examples.py +++ /dev/null @@ -1,94 +0,0 @@ -# coding=utf-8 -# Copyright 2018 HuggingFace Inc.. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import json -import logging -import os -import sys -from time import time -from unittest.mock import patch - -from transformers.testing_utils import TestCasePlus, require_torch_tpu - - -logging.basicConfig(level=logging.DEBUG) - -logger = logging.getLogger() - - -def get_results(output_dir): - results = {} - path = os.path.join(output_dir, "all_results.json") - if os.path.exists(path): - with open(path, "r") as f: - results = json.load(f) - else: - raise ValueError(f"can't find {path}") - return results - - -stream_handler = logging.StreamHandler(sys.stdout) -logger.addHandler(stream_handler) - - -@require_torch_tpu -class TorchXLAExamplesTests(TestCasePlus): - def test_run_glue(self): - import xla_spawn - - tmp_dir = self.get_auto_remove_tmp_dir() - testargs = f""" - ./examples/pytorch/text-classification/run_glue.py - --num_cores=8 - ./examples/pytorch/text-classification/run_glue.py - --model_name_or_path distilbert-base-uncased - --output_dir {tmp_dir} - --overwrite_output_dir - --train_file ./tests/fixtures/tests_samples/MRPC/train.csv - --validation_file ./tests/fixtures/tests_samples/MRPC/dev.csv - --do_train - --do_eval - --debug tpu_metrics_debug - --per_device_train_batch_size=2 - --per_device_eval_batch_size=1 - --learning_rate=1e-4 - --max_steps=10 - --warmup_steps=2 - --seed=42 - --max_seq_length=128 - """.split() - - with patch.object(sys, "argv", testargs): - start = time() - xla_spawn.main() - end = time() - - result = get_results(tmp_dir) - self.assertGreaterEqual(result["eval_accuracy"], 0.75) - - # Assert that the script takes less than 500 seconds to make sure it doesn't hang. - self.assertLess(end - start, 500) - - def test_trainer_tpu(self): - import xla_spawn - - testargs = """ - ./tests/test_trainer_tpu.py - --num_cores=8 - ./tests/test_trainer_tpu.py - """.split() - with patch.object(sys, "argv", testargs): - xla_spawn.main() diff --git a/examples/pytorch/xla_spawn.py b/examples/pytorch/xla_spawn.py deleted file mode 100644 index 5df6bfa2d5..0000000000 --- a/examples/pytorch/xla_spawn.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2020 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -A simple launcher script for TPU training - -Inspired by https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py - -:: - >>> python xla_spawn.py --num_cores=NUM_CORES_YOU_HAVE - YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other - arguments of your training script) - -""" - - -import importlib -import sys -from argparse import REMAINDER, ArgumentParser -from pathlib import Path - -import torch_xla.distributed.xla_multiprocessing as xmp - - -def parse_args(): - """ - Helper function parsing the command line options - @retval ArgumentParser - """ - parser = ArgumentParser( - description=( - "PyTorch TPU distributed training launch helper utility that will spawn up multiple distributed processes" - ) - ) - - # Optional arguments for the launch helper - parser.add_argument("--num_cores", type=int, default=1, help="Number of TPU cores to use (1 or 8).") - - # positional - parser.add_argument( - "training_script", - type=str, - help=( - "The full path to the single TPU training " - "program/script to be launched in parallel, " - "followed by all the arguments for the " - "training script" - ), - ) - - # rest from the training program - parser.add_argument("training_script_args", nargs=REMAINDER) - - return parser.parse_args() - - -def main(): - args = parse_args() - - # Import training_script as a module. - script_fpath = Path(args.training_script) - sys.path.append(str(script_fpath.parent.resolve())) - mod_name = script_fpath.stem - mod = importlib.import_module(mod_name) - - # Patch sys.argv - sys.argv = [args.training_script] + args.training_script_args + ["--tpu_num_cores", str(args.num_cores)] - - xmp.spawn(mod._mp_fn, args=(), nprocs=args.num_cores) - - -if __name__ == "__main__": - main() From 1fd036340f0c34b4c22fb8655bab906e58135678 Mon Sep 17 00:00:00 2001 From: calpt Date: Sun, 23 Jun 2024 11:54:26 +0200 Subject: [PATCH 3/4] Update examples --- .github/workflows/tests_torch.yml | 2 +- .../pytorch/dependency-parsing/run_udp.py | 12 +--- examples/pytorch/multiple-choice/run_swag.py | 66 ++++++++++++------- examples/pytorch/test_adapter_examples.py | 4 +- 4 files changed, 47 insertions(+), 37 deletions(-) diff --git a/.github/workflows/tests_torch.yml b/.github/workflows/tests_torch.yml index cf61485852..0b2f775fbe 100644 --- a/.github/workflows/tests_torch.yml +++ b/.github/workflows/tests_torch.yml @@ -109,7 +109,7 @@ jobs: - name: Install run: | pip install torch==2.1.2 - pip install .[sklearn,testing,sentencepiece] + pip install .[sklearn,testing,sentencepiece,seqeval] pip install conllu - name: Test Examples run: | diff --git a/examples/pytorch/dependency-parsing/run_udp.py b/examples/pytorch/dependency-parsing/run_udp.py index 8fefe1f49c..43bcaff6a4 100644 --- a/examples/pytorch/dependency-parsing/run_udp.py +++ b/examples/pytorch/dependency-parsing/run_udp.py @@ -80,7 +80,6 @@ class DataTrainingArguments: default=False, metadata={"help": "Overwrite the cached training and evaluation sets."}, ) - use_mock_data: bool = field(default=False) evaluate_on: str = field(default="validation") @@ -180,16 +179,7 @@ def main(): ) # Load and preprocess dataset - if data_args.use_mock_data: - from datasets import Version, load_dataset_builder - from datasets.commands.dummy_data import MockDownloadManager - - dataset_builder = load_dataset_builder("universal_dependencies", data_args.task_name) - mock_dl_manager = MockDownloadManager("universal_dependencies", dataset_builder.config, Version("2.7.0")) - dataset_builder.download_and_prepare(dl_manager=mock_dl_manager, ignore_verifications=True) - dataset = dataset_builder.as_dataset() - else: - dataset = load_dataset("universal_dependencies", data_args.task_name) + dataset = load_dataset("universal_dependencies", data_args.task_name, trust_remote_code=True) dataset = preprocess_dataset(dataset, tokenizer, labels, data_args, pad_token_id=-1) # Setup adapters diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index aa321cfc35..d5e721c3ba 100644 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -81,12 +81,22 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( + token: str = field( + default=None, + metadata={ + "help": ( + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." + ) + }, + ) + trust_remote_code: bool = field( default=False, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option " + "should only be set to `True` for repositories you trust and in which you have read the code, as it will " + "execute code present on the Hub on your local machine." ) }, ) @@ -235,6 +245,11 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) + + if training_args.should_log: + # The default of training_args.log_level is passive, so we set log level at info here to have that default. + transformers.utils.logging.set_verbosity_info() + log_level = training_args.get_process_log_level() logger.setLevel(log_level) datasets.utils.logging.set_verbosity(log_level) @@ -244,8 +259,8 @@ def main(): # Log on each process the small summary: logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" - + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " + + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}" ) logger.info(f"Training/evaluation parameters {training_args}") @@ -280,14 +295,15 @@ def main(): data_files = {} if data_args.train_file is not None: data_files["train"] = data_args.train_file + extension = data_args.train_file.split(".")[-1] if data_args.validation_file is not None: data_files["validation"] = data_args.validation_file - extension = data_args.train_file.split(".")[-1] + extension = data_args.validation_file.split(".")[-1] raw_datasets = load_dataset( extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Downloading and loading the swag dataset from the hub. @@ -295,10 +311,10 @@ def main(): "swag", "regular", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at - # https://huggingface.co/docs/datasets/loading_datasets.html. + # https://huggingface.co/docs/datasets/loading_datasets. # Load pretrained model and tokenizer @@ -309,14 +325,16 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, + trust_remote_code=model_args.trust_remote_code, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, + trust_remote_code=model_args.trust_remote_code, ) model = AutoModelForMultipleChoice.from_pretrained( model_args.model_name_or_path, @@ -324,7 +342,8 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, + trust_remote_code=model_args.trust_remote_code, ) # Convert the model into an adapter model @@ -339,14 +358,15 @@ def main(): max_seq_length = tokenizer.model_max_length if max_seq_length > 1024: logger.warning( - f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). " - "Picking 1024 instead. You can change that default value by passing --max_seq_length xxx." + "The chosen tokenizer supports a `model_max_length` that is longer than the default `block_size` value" + " of 1024. If you would like to use a longer `block_size` up to `tokenizer.model_max_length` you can" + " override this default with `--block_size xxx`." ) max_seq_length = 1024 else: if data_args.max_seq_length > tokenizer.model_max_length: logger.warning( - f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" + f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the " f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." ) max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) @@ -462,14 +482,14 @@ def compute_metrics(eval_predictions): trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) - kwargs = dict( - finetuned_from=model_args.model_name_or_path, - tasks="multiple-choice", - dataset_tags="swag", - dataset_args="regular", - dataset="SWAG", - language="en", - ) + kwargs = { + "finetuned_from": model_args.model_name_or_path, + "tasks": "multiple-choice", + "dataset_tags": "swag", + "dataset_args": "regular", + "dataset": "SWAG", + "language": "en", + } if training_args.push_to_hub: trainer.push_to_hub(**kwargs) diff --git a/examples/pytorch/test_adapter_examples.py b/examples/pytorch/test_adapter_examples.py index 1b4bd98486..2ea851902b 100644 --- a/examples/pytorch/test_adapter_examples.py +++ b/examples/pytorch/test_adapter_examples.py @@ -158,7 +158,7 @@ def test_run_swag_adapter(self): --validation_file ./tests/fixtures/samples/swag/sample.json --output_dir {tmp_dir} --overwrite_output_dir - --max_steps=20 + --max_steps=40 --warmup_steps=2 --do_train --do_eval @@ -364,6 +364,7 @@ def test_run_ner_adapter(self): self.assertGreaterEqual(result["eval_precision"], 0.75) self.assertLess(result["eval_loss"], 0.5) + @slow def test_run_udp_adapter(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -375,7 +376,6 @@ def test_run_udp_adapter(self): --do_train --do_eval --task_name en_ewt - --use_mock_data --evaluate_on train --per_device_train_batch_size=2 --per_device_eval_batch_size=1 From faa586f4bc8d2894f5501df1ce86aa1806346062 Mon Sep 17 00:00:00 2001 From: calpt Date: Sun, 23 Jun 2024 12:10:41 +0200 Subject: [PATCH 4/4] fix --- .github/workflows/tests_torch.yml | 4 ++-- examples/pytorch/multiple-choice/run_swag.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests_torch.yml b/.github/workflows/tests_torch.yml index 0b2f775fbe..668beb9e62 100644 --- a/.github/workflows/tests_torch.yml +++ b/.github/workflows/tests_torch.yml @@ -109,8 +109,8 @@ jobs: - name: Install run: | pip install torch==2.1.2 - pip install .[sklearn,testing,sentencepiece,seqeval] - pip install conllu + pip install .[sklearn,testing,sentencepiece] + pip install conllu seqeval - name: Test Examples run: | make test-examples diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index d5e721c3ba..c3aaa64b23 100644 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -94,9 +94,9 @@ class ModelArguments: default=False, metadata={ "help": ( - "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option " - "should only be set to `True` for repositories you trust and in which you have read the code, as it will " - "execute code present on the Hub on your local machine." + "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" + " should only be set to `True` for repositories you trust and in which you have read the code, as it" + " will execute code present on the Hub on your local machine." ) }, ) @@ -260,7 +260,8 @@ def main(): # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " - + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}" + + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training:" + f" {training_args.fp16}" ) logger.info(f"Training/evaluation parameters {training_args}")