Skip to content

Commit

Permalink
Merge branch 'main' into sync/v4.36.0
Browse files Browse the repository at this point in the history
  • Loading branch information
calpt authored Jan 13, 2024
2 parents a96a49f + 5c5f10c commit 86309c1
Show file tree
Hide file tree
Showing 12 changed files with 54 additions and 101 deletions.
5 changes: 5 additions & 0 deletions .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
blank_issues_enabled: true
contact_links:
- name: "🗪 Discussions Forum"
url: https://github.com/adapter-hub/adapters/discussions
about: Ask questions on working with adapters, request help or share your work
21 changes: 0 additions & 21 deletions .github/ISSUE_TEMPLATE/question-help.md

This file was deleted.

38 changes: 0 additions & 38 deletions .github/workflows/pr_dependencies.yml

This file was deleted.

2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2020-2023 The AdapterHub Team. All rights reserved.
Copyright 2020-2024 The AdapterHub Team. All rights reserved.

Apache License
Version 2.0, January 2004
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ pip install -U adapters

```
git clone https://github.com/adapter-hub/adapters.git
git checkout adapters
cd adapters
pip install .
```
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# -- Project information -----------------------------------------------------

project = "AdapterHub"
copyright = "2020-2023, AdapterHub Team"
copyright = "2020-2024, AdapterHub Team"
author = "AdapterHub Team"

docs_versions = [
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def deps_list(*pkgs):

setup(
name="adapters",
version="0.1.0",
version="0.1.1",
author="The AdapterHub team and community contributors",
author_email="[email protected]",
description="A Unified Library for Parameter-Efficient and Modular Transfer Learning",
Expand Down
2 changes: 1 addition & 1 deletion src/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.1.0"
__version__ = "0.1.1"

from typing import TYPE_CHECKING

Expand Down
48 changes: 31 additions & 17 deletions src/adapters/heads/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@
)
from transformers.utils import ModelOutput

from ..composition import AdapterCompositionBlock, BatchSplit, Parallel, parse_heads_from_composition
from ..composition import (
AdapterCompositionBlock,
BatchSplit,
Parallel,
adjust_tensors_for_parallel,
parse_heads_from_composition,
)
from ..context import AdapterSetup, ForwardContext
from ..loading import PredictionHeadLoader
from ..methods.modeling import Activation_Function_Class
Expand Down Expand Up @@ -105,6 +111,21 @@ def get_output_embeddings(self):
def get_label_names(self):
return ["labels"]

def _get_cls_output(self, outputs, **kwargs):
if self.config["use_pooler"]:
cls_output = kwargs.pop("pooled_output")
elif kwargs.get("get_cls_from_eos_tokens", False):
x = outputs[0] # last hidden state
eos_mask = kwargs.get("eos_mask")
(eos_mask,) = adjust_tensors_for_parallel(x, eos_mask)
if len(torch.unique(eos_mask.sum(1))) > 1:
raise ValueError("All examples must have the same number of <eos> tokens.")
cls_output = x[eos_mask, :].view(x.size(0), -1, x.size(-1))[:, -1, :]
else:
cls_output = outputs[0][:, 0]

return cls_output


class ClassificationHead(PredictionHead):
def __init__(
Expand Down Expand Up @@ -134,10 +155,7 @@ def __init__(

def forward(self, outputs, cls_output=None, attention_mask=None, return_dict=False, **kwargs):
if cls_output is None:
if self.config["use_pooler"]:
cls_output = kwargs.pop("pooled_output")
else:
cls_output = outputs[0][:, 0]
cls_output = self._get_cls_output(outputs, **kwargs)
logits = super().forward(cls_output)
loss = None
labels = kwargs.pop("labels", None)
Expand Down Expand Up @@ -205,10 +223,7 @@ def __init__(

def forward(self, outputs, cls_output=None, attention_mask=None, return_dict=False, **kwargs):
if cls_output is None:
if self.config["use_pooler"]:
cls_output = kwargs.pop("pooled_output")
else:
cls_output = outputs[0][:, 0]
cls_output = self._get_cls_output(outputs, **kwargs)
logits = super().forward(cls_output)
loss = None
labels = kwargs.pop("labels", None)
Expand Down Expand Up @@ -271,10 +286,7 @@ def __init__(

def forward(self, outputs, cls_output=None, attention_mask=None, return_dict=None, **kwargs):
if cls_output is None:
if self.config["use_pooler"]:
cls_output = kwargs.pop("pooled_output")
else:
cls_output = outputs[0][:, 0]
cls_output = self._get_cls_output(outputs, **kwargs)
logits = super().forward(cls_output)
logits = logits.view(-1, self.config["num_choices"])
loss = None
Expand Down Expand Up @@ -476,10 +488,7 @@ def __init__(

def forward(self, outputs, cls_output=None, attention_mask=None, return_dict=False, **kwargs):
if cls_output is None:
if self.config["use_pooler"]:
cls_output = kwargs.pop("pooled_output")
else:
cls_output = outputs[0][:, 0]
cls_output = self._get_cls_output(outputs, **kwargs)
logits = super().forward(cls_output)
loss = None
labels = kwargs.pop("labels", None)
Expand Down Expand Up @@ -800,6 +809,9 @@ def forward_head(
cls_output (torch.Tensor, optional): The classification output of the model.
attention_mask (torch.Tensor, optional): The attention mask of the model.
return_dict (bool): Whether or not to return a ``ModelOutput`` instead of a plain tuple.
get_cls_from_eos_tokens (bool):
If set to True, retrieve classifier token representations from the last <eos> token in the sequence.
Setting to True requires `eos_mask` to be passed as well.
**kwargs: Additional keyword arguments passed to the forward pass of the head.
"""
used_head_modules = self._get_used_heads(head_name)
Expand Down Expand Up @@ -846,10 +858,12 @@ def _get_head_input(outputs, cls_out, batch):
)
head_outputs = []
labels = kwargs.pop("labels", None)
eos_mask = kwargs.pop("eos_mask", None)
for i, head in enumerate(self.active_head):
head_module = self.heads[head]
batch_idx = range(sum(self.active_head.batch_sizes[:i]), sum(self.active_head.batch_sizes[: i + 1]))
kwargs["labels"] = labels[batch_idx] if labels is not None else None
kwargs["eos_mask"] = eos_mask[batch_idx] if eos_mask is not None else None
head_inputs, head_cls_input = _get_head_input(all_outputs, cls_output, batch_idx)
# head_attention = attention_mask[batch_idx] if attention_mask is not None else None
head_output = head_module(head_inputs, head_cls_input, attention_mask, return_dict, **kwargs)
Expand Down
14 changes: 10 additions & 4 deletions src/adapters/methods/prefix_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,20 @@ def __init__(
n_heads: int,
input_size: int,
config: PrefixTuningConfig,
n_embd_per_head: Optional[int] = None,
):
super().__init__()
self.n_layers = n_layers
self.n_heads = n_heads
self.input_size = input_size
self.n_embd_per_head = self.input_size // self.n_heads
self.n_embd_per_head = n_embd_per_head or self.input_size // self.n_heads
self.config = config

self.wte = nn.Embedding(self.config.prefix_length, self.input_size)
self.control_trans = nn.Sequential(
nn.Linear(self.input_size, self.config.bottleneck_size),
Activation_Function_Class(self.config.non_linearity.lower()),
nn.Linear(self.config.bottleneck_size, self.n_layers * 2 * self.input_size),
nn.Linear(self.config.bottleneck_size, self.n_layers * 2 * self.n_heads * self.n_embd_per_head),
)
self.dropout = nn.Dropout(self.config.dropout)

Expand Down Expand Up @@ -70,15 +71,18 @@ def __init__(
n_heads: int,
input_size: int,
config: PrefixTuningConfig,
n_embd_per_head: Optional[int] = None,
):
super().__init__()
self.n_layers = n_layers
self.n_heads = n_heads
self.input_size = input_size
self.n_embd_per_head = self.input_size // self.n_heads
self.n_embd_per_head = n_embd_per_head or self.input_size // self.n_heads
self.config = config

self.control_trans = nn.Parameter(torch.randn(self.config.prefix_length * self.n_layers * 2 * self.input_size))
self.control_trans = nn.Parameter(
torch.randn(self.config.prefix_length * self.n_layers * 2 * self.n_heads * self.n_embd_per_head)
)

self.dropout = nn.Dropout(self.config.dropout)

Expand Down Expand Up @@ -174,6 +178,7 @@ def confirm_prefix(self, prefix_name: str) -> bool:
"n_layers": location_config["count"],
"n_heads": location_config["n_heads"],
"input_size": location_config["input_size"],
"n_embd_per_head": location_config["n_embd_per_head"],
}
prefix_tuning = PrefixTuningGroup(module_configs, prefix_tuning_config)
prefix_tuning.train(self.training) # make sure training mode is consistent
Expand Down Expand Up @@ -319,6 +324,7 @@ def add_adapter(self, adapter_name: str, layer_idx: int) -> bool:
self.location_key,
n_heads=self.model_config.num_attention_heads,
input_size=self.model_config.hidden_size,
n_embd_per_head=getattr(self.model_config, "d_kv", None), # this is currently specific to T5-3B
)
self.prefixes[adapter_name] = prefix_id

Expand Down
5 changes: 1 addition & 4 deletions src/adapters/model_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,7 @@ def _get_active_setup(self):
adapter_setup = self.adapters_config.active_setup
else:
adapter_setup = None
skip_adapters = adapter_setup is None or (
self.adapters_config.skip_layers is not None and self.layer_idx in self.adapters_config.skip_layers
)
if not skip_adapters and (len(adapter_setup.flatten()) > 0):
if adapter_setup is not None and (len(adapter_setup.flatten()) > 0):
return adapter_setup
else:
return None
Expand Down
15 changes: 3 additions & 12 deletions src/adapters/models/bart/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
)
from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward

from ...composition import adjust_tensors_for_parallel
from ...heads import (
ClassificationHead,
ModelWithFlexibleHeadsAdaptersMixin,
Expand Down Expand Up @@ -102,23 +101,15 @@ def forward(
)
# required e.g. for prompt tuning in all models
kwargs["context"] = context
# sequence classification based on last token in sequence
x = outputs[0] # last hidden state
if input_ids is not None and x.shape[1] == input_ids.shape[1]:
eos_mask = input_ids.eq(self.config.eos_token_id)
(eos_mask,) = adjust_tensors_for_parallel(x, eos_mask)
if len(torch.unique(eos_mask.sum(1))) > 1:
raise ValueError("All examples must have the same number of <eos> tokens.")
cls_representation = x[eos_mask, :].view(x.size(0), -1, x.size(-1))[:, -1, :]
else:
cls_representation = x

head_outputs = self.forward_head(
outputs,
head_name=head,
cls_output=cls_representation,
attention_mask=attention_mask,
return_dict=return_dict,
get_cls_from_eos_tokens=True,
# `get_cls_from_eos_tokens` requires passing eos mask
eos_mask=input_ids.eq(self.config.eos_token_id) if input_ids is not None else None,
**kwargs,
)

Expand Down

0 comments on commit 86309c1

Please sign in to comment.