-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcallbacks.py
344 lines (275 loc) · 12.7 KB
/
callbacks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
import numpy as np
import time
from datetime import datetime
import csv
import os
np.set_printoptions(formatter={"float": lambda x: "{0:0.2f}".format(x)})
# example of callback structure
def example(param="value"):
"""
This is just a model of callback.
"""
def example_cb(state, logger, model, optimizer, disabled_pipes):
return state
return example_cb
# The real callbacks
# iteration
def print_scores_on_epoch(validation=True):
"""
write the scores/loss in the log file
:param validation when this value is false validation scores aren't printed
"""
def print_scores_cb(state, logger, model, optimizer, disabled_pipes):
e = state["i"] + 1
ner = state["history"]["ner"][-1]
f_score = state["history"]["f_score"][-1]
precision_score = state["history"]["precision"][-1]
batches = state["history"]["batches"][-1]
logger.info("......................................................................")
logger.info(f" Epoch N° {e}/{state['epochs']} | batches processed: {batches}")
logger.info(f"Scores : NER loss:{ner}, f1-score: {f_score}, precision: {precision_score}")
# discard validation data
if validation:
val_f_score = state["history"]["val_f_score"][-1]
val_precision_score = state["history"]["val_precision"][-1]
logger.info(f"Validation Losses rate: f1-score: {val_f_score}, precision: {val_precision_score}")
return state
return print_scores_cb
def save_best_model(path_best_model="", threshold=40, score="val_f_score", mode="max", test=False):
"""
Save the model if the epoch score is more than the threshold
or if current score is a new max.
:param path_best_model where to save the model. This callback add this path to the history when saved
:param threshold value to reach in order to save the first time
:param score value to be considered
:param mode gives the possibility to use scores with minimun like "ner" loss as trigger.
posible values "min" or "max"
:param test boolean value used to trigger a score evaluation with test dataset
"""
def save_best_model_cb(state, logger, model, optimizer, disabled_pipes):
save = False
if mode == "max":
save = (state["history"][score][-1] >= threshold) and (state["history"][score][-1] > state["max_" + score])
elif mode == "min":
save = (state["history"][score][-1] <= threshold) and (state["history"][score][-1] < state["min_" + score])
if save:
e = state["i"] + 1
if test:
# change this flag to
state["evaluate_test"] = True
# add pipes for the current model
for pipe in disabled_pipes:
model.add_pipe(pipe[1], before="ner")
with model.use_params(optimizer.averages):
model.to_disk(path_best_model)
print("Saving model with the following pipes", model.pipe_names)
logger.info(f"💾 Saving model for epoch {e}")
state["history"]["saved"][state["i"]] = path_best_model
# disble other pipes
pipe_exceptions = ["ner"]
other_pipes = [pipe for pipe in model.pipe_names if pipe not in pipe_exceptions]
disabled_pipes = model.disable_pipes(*other_pipes)
# since model is a reference for nlp, ensure the train continues only with ner
assert (
len(model.pipe_names) == 1 and model.pipe_names[0] == "ner"
), "Model must be trained only with the ner pipe"
return state
return save_best_model_cb
def reduce_lr_on_plateau(step=0.001, epochs=4, diff=1, score="val_f_score", last_chance=True):
"""
Whe the model is not getting better scores (plateau or decrease)
from the selected amount of last epochs this function sets the
learning rate decreasing it a fixed step.
Note: Uses the average of last scores
:param step fixed amout to decrease the learning rate
:param epochs last epochs to be considered
:param diff score difference amount to produce a change in the the learning rate
:param score score used to calculate the diff
:param last_chance gives an extra oportunity if the last epoch has a positive diff
"""
def reduce_lr_on_plateau_cb(state, logger, model, optimizer, disabled_pipes):
if len(state["history"][score]) > epochs and state["lr"] > step:
delta = np.diff(state["history"][score])[-epochs:]
if np.mean(delta) < diff:
# maybe you have been getting bad scores but the last epoch shed a glimmer of hope
if last_chance and delta[-1] > 0:
logger.info("[reduce_lr_on_plateau] Positive rate 🛫! waiting a bit more until touch learning rate")
else:
state["lr"] -= step
state["epochs"] += 1 # CHECK! we add 1 epoch for each decrementation of the LR
logger.info(
f"[reduce_lr_on_plateau] Not learning then reduce learn rate to {state['lr']} and epochs to {state['epochs']}"
)
return state
return reduce_lr_on_plateau_cb
def early_stop(epochs=10, score="val_f_score", diff=5, last_chance=True):
"""
Sets the stop value to True in state if score is not improving during the last epochs
Note: Uses the average of last scores
:param epochs last epochs to be considered
:param diff score difference amount to produce a change in the the learning rate
:param score score used to calculate the diff
:param last_chance gives an extra oportunity if the last epoch has a positive diff
"""
def early_stop_cb(state, logger, model, optimizer, disabled_pipes):
if len(state["history"][score]) > epochs:
delta = np.diff(state["history"][score])[-epochs:]
print(delta, " - suma de diff: ", np.sum(delta))
if np.sum(delta) < diff:
# maybe you have been getting bad scores but the last epoch shed a glimmer of hope
if last_chance and delta[-1] > 0:
logger.info("[early_stop] Positive rate 🛫! One more chance")
else:
state["stop"] = True
logger.info("[early_stop] Not learning what I want 😭😭. Bye Bye Adieu!")
return state
return early_stop_cb
def update_best_scores(validation=True):
"""
Update max or min scores in state based on history
:param validation when this value is false validation scores are excluded
"""
def update_best_scores_cb(state, logger, model, optimizer, disabled_pipes):
# max and min
state["min_ner"] = min(state["history"]["ner"])
state["max_f_score"] = max(state["history"]["f_score"])
state["max_recall"] = max(state["history"]["recall"])
state["max_precision"] = max(state["history"]["precision"])
if validation:
state["max_val_f_score"] = max(state["history"]["val_f_score"])
state["max_val_recall"] = max(state["history"]["val_recall"])
state["max_val_precision"] = max(state["history"]["val_precision"])
return state
return update_best_scores_cb
# batch plugins
def sleep(secs=0.5, log=False):
"""
Sleep the train loop procces some secs.
This is experimental, however has improved loop performances
in some cases.
"""
def sleep_cb(state, logger, model, optimizer, disabled_pipes):
if log:
logger.info(f"😴😴😴 sleeping for {secs} secs")
time.sleep(secs)
return state
return sleep_cb
def change_dropout_fixed(step=0.01, until=0.5):
"""
[experimental] change the dropout each epoch
:param step amount to change per epoch
:param until limit for dropout change
"""
def change_dropout_fixed_cb(state, logger, model, optimizer, disabled_pipes):
state["dropout"]
if step > 0 and state["dropout"] < until:
state["dropout"] += step
logger.info(f"[change_dropout_fixed] touching dropout. New value {state['dropout']}")
elif step < 0 and state["dropout"] > until:
# negative step
state["dropout"] += step
logger.info(f"[change_dropout_fixed] touching dropout. New value {state['dropout']}")
else:
logger.info("[change_dropout_fixed] No more room for touching dropout")
return state
return change_dropout_fixed_cb
# on stop plugins
def log_best_scores(validation=True):
"""
Logs the max/mins from state
:param validation when this value is false validation scores are excluded
"""
def log_best_scores_cb(state, logger, model, optimizer, disabled_pipes):
logger.info("\n\n")
logger.info("-------🏆-BEST-SCORES-🏅----------")
e = state["i"]
logger.info(f"using a dataset of length {state['train_size']} in {e}/{state['epochs']}")
logger.info(f"elapsed time: {state['elapsed_time']} minutes")
logger.info(f"NER loss -> min {state['min_ner']}")
# Scores
if validation:
logger.info(f"RECALL -> max {state['max_recall']} | validation max {state['max_val_recall']}")
logger.info(f"PRECISION -> max {state['max_precision']} | val max {state['max_val_precision']}")
logger.info(f"F-SCORE -> max {state['max_f_score']} | val max {state['max_val_f_score']}")
else:
logger.info(f"RECALL -> max {state['max_recall']}")
logger.info(f"PRECISION -> max {state['max_precision']}")
logger.info(f"F-SCORE -> max {state['max_f_score']}")
return state
return log_best_scores_cb
def save_csv_history(filename="history.csv", session="", validation=True):
"""
Save history values to csv file
:param filename file where to write the csv rows
:param session session id. If blank a date string is used in each call
:param validation when this value is false validation scores are excluded
"""
def save_csv_history_cb(state, logger, model, optimizer, disabled_pipes):
path = f"history/{filename}"
logger.info("\n\n")
logger.info(f"[save_csv_history] 💾 Saving history in a {path} file")
# create file if not exists
if not os.path.exists(path):
with open(path, "w"):
pass
# give a session name if not given
if session == "":
now = datetime.now()
s = now.strftime("%Y%m%d%H%M%S")
else:
s = session
header = [
"session",
"epoch",
"batches",
"lr",
"dropout",
"ner",
"f_score",
"recall",
"precision",
"per_type_score",
"val_f_score",
"val_recall",
"val_precision",
"val_per_type_score",
]
rows = []
# prepare the rows
logger.info("\n[save_csv_history] preparing rows ...")
for i in range(len(state["history"]["ner"])):
if validation:
val_f_score = state["history"]["val_f_score"][i]
val_recall = state["history"]["val_recall"][i]
val_precision = state["history"]["val_precision"][i]
val_per_type_score = state["history"]["val_per_type_score"][i]
else:
val_f_score, val_recall, val_precision, val_per_type_score = None, None, None, None
rows.append(
{
"session": s,
"epoch": i + 1,
"batches": state["history"]["batches"][i],
"lr": state["history"]["lr"][i],
"dropout": state["history"]["dropout"][i],
"ner": state["history"]["ner"][i],
"f_score": state["history"]["f_score"][i],
"recall": state["history"]["recall"][i],
"precision": state["history"]["precision"][i],
"per_type_score": state["history"]["per_type_score"][i],
"val_f_score": val_f_score,
"val_recall": val_recall,
"val_precision": val_precision,
"val_per_type_score": val_per_type_score,
}
)
# opening the csv file in 'w' mode
file = open(path, "w", newline="")
with file:
writer = csv.DictWriter(file, fieldnames=header)
writer.writeheader()
for r in rows:
writer.writerow(r)
logger.info(f"[save_csv_history] 💾 saved!! {path} file")
return state
return save_csv_history_cb