-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathinterveners.py
45 lines (42 loc) · 1.54 KB
/
interveners.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import torch
import torch.nn as nn
def wrapper(intervener):
def wrapped(*args, **kwargs):
return intervener(*args, **kwargs)
return wrapped
class Collector():
collect_state = True
collect_action = False
def __init__(self, multiplier, head):
self.head = head
self.states = []
self.actions = []
def reset(self):
self.states = []
self.actions = []
def __call__(self, b, s):
if self.head == -1:
self.states.append(b[0, -1].detach().clone()) # original b is (batch_size, seq_len, #key_value_heads x D_head)
else:
self.states.append(b[0, -1].reshape(32, -1)[self.head].detach().clone()) # original b is (batch_size, seq_len, #key_value_heads x D_head)
return b
class ITI_Intervener():
collect_state = True
collect_action = True
attr_idx = -1
def __init__(self, direction, multiplier):
if not isinstance(direction, torch.Tensor):
direction = torch.tensor(direction)
self.direction = direction.cuda().half()
self.multiplier = multiplier
self.states = []
self.actions = []
def reset(self):
self.states = []
self.actions = []
def __call__(self, b, s):
self.states.append(b[0, -1].detach().clone()) # original b is (batch_size=1, seq_len, #head x D_head), now it's (#head x D_head)
action = self.direction.to(b.device)
self.actions.append(action.detach().clone())
b[0, -1] = b[0, -1] + action * self.multiplier
return b