-
Notifications
You must be signed in to change notification settings - Fork 50
/
Copy pathloggingreporter.py
135 lines (103 loc) · 5.71 KB
/
loggingreporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from __future__ import print_function
import keras
import keras.backend as K
import numpy as np
from six.moves import cPickle
import os
import utils
class LoggingReporter(keras.callbacks.Callback):
def __init__(self, cfg, trn, tst, do_save_func=None, *kargs, **kwargs):
super(LoggingReporter, self).__init__(*kargs, **kwargs)
self.cfg = cfg # Configuration options dictionary
self.trn = trn # Train data
self.tst = tst # Test data
if 'FULL_MI' not in cfg:
self.cfg['FULL_MI'] = False # Whether to compute MI on train and test data, or just test
if self.cfg['FULL_MI']:
self.full = utils.construct_full_dataset(trn,tst)
# do_save_func(epoch) should return True if we should save on that epoch
self.do_save_func = do_save_func
def on_train_begin(self, logs={}):
if not os.path.exists(self.cfg['SAVE_DIR']):
print("Making directory", self.cfg['SAVE_DIR'])
os.makedirs(self.cfg['SAVE_DIR'])
# Indexes of the layers which we keep track of. Basically, this will be any layer
# which has a 'kernel' attribute, which is essentially the "Dense" or "Dense"-like layers
self.layerixs = []
# Functions return activity of each layer
self.layerfuncs = []
# Functions return weights of each layer
self.layerweights = []
for lndx, l in enumerate(self.model.layers):
if hasattr(l, 'kernel'):
self.layerixs.append(lndx)
self.layerfuncs.append(K.function(self.model.inputs, [l.output,]))
self.layerweights.append(l.kernel)
inputs = [self.model._feed_inputs,
self.model._feed_targets,
self.model._feed_sample_weights,
K.learning_phase()]
# Get gradients of all the relevant layers at once
grads = self.model.optimizer.get_gradients(self.model.total_loss, self.layerweights)
self.get_gradients = K.function(inputs=inputs, outputs=grads)
# Get cross-entropy loss
self.get_loss = K.function(inputs=inputs, outputs=[self.model.total_loss,])
def on_epoch_begin(self, epoch, logs={}):
if self.do_save_func is not None and not self.do_save_func(epoch):
# Don't log this epoch
self._log_gradients = False
else:
# We will log this epoch. For each batch in this epoch, we will save the gradients (in on_batch_begin)
# We will then compute means and vars of these gradients
self._log_gradients = True
self._batch_weightnorm = []
self._batch_gradients = [ [] for _ in self.model.layers[1:] ]
# Indexes of all the training data samples. These are shuffled and read-in in chunks of SGD_BATCHSIZE
ixs = list(range(len(self.trn.X)))
np.random.shuffle(ixs)
self._batch_todo_ixs = ixs
def on_batch_begin(self, batch, logs={}):
if not self._log_gradients:
# We are not keeping track of batch gradients, so do nothing
return
# Sample a batch
batchsize = self.cfg['SGD_BATCHSIZE']
cur_ixs = self._batch_todo_ixs[:batchsize]
# Advance the indexing, so next on_batch_begin samples a different batch
self._batch_todo_ixs = self._batch_todo_ixs[batchsize:]
# Get gradients for this batch
x, y, weights = self.model._standardize_user_data(self.trn.X[cur_ixs,:], self.trn.Y[cur_ixs,:])
inputs = [x, y, weights, 1] # 1 indicates training phase
for lndx, g in enumerate(self.get_gradients(inputs)):
# g is gradients for weights of lndx's layer
oneDgrad = np.reshape(g, [-1, 1]) # Flatten to one dimensional vector
self._batch_gradients[lndx].append(oneDgrad)
def on_epoch_end(self, epoch, logs={}):
if self.do_save_func is not None and not self.do_save_func(epoch):
# Don't log this epoch
return
# Get overall performance
loss = {}
for cdata, cdataname, istrain in ((self.trn,'trn',1), (self.tst, 'tst',0)):
x, y, weights = self.model._standardize_user_data(cdata.X, cdata.Y)
loss[cdataname] = self.get_loss([x, y, weights, istrain])[0].flat[0]
data = {
'weights_norm' : [], # L2 norm of weights
'gradmean' : [], # Mean of gradients
'gradstd' : [], # Std of gradients
'activity_tst' : [] # Activity in each layer for test set
}
for lndx, layerix in enumerate(self.layerixs):
clayer = self.model.layers[layerix]
data['weights_norm'].append( np.linalg.norm(K.get_value(clayer.kernel)) )
stackedgrads = np.stack(self._batch_gradients[lndx], axis=1)
data['gradmean' ].append( np.linalg.norm(stackedgrads.mean(axis=1)) )
data['gradstd' ].append( np.linalg.norm(stackedgrads.std(axis=1)) )
if self.cfg['FULL_MI']:
data['activity_tst'].append(self.layerfuncs[lndx]([self.full.X,])[0])
else:
data['activity_tst'].append(self.layerfuncs[lndx]([self.tst.X,])[0])
fname = self.cfg['SAVE_DIR'] + "/epoch%08d"% epoch
print("Saving", fname)
with open(fname, 'wb') as f:
cPickle.dump({'ACTIVATION':self.cfg['ACTIVATION'], 'epoch':epoch, 'data':data, 'loss':loss}, f, cPickle.HIGHEST_PROTOCOL)