forked from sergeyf/ki_43A
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate_utils.py
231 lines (199 loc) · 7.36 KB
/
evaluate_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 11 15:31:42 2021
@author: sergey feldman
"""
from splitting_utils import get_cv
from lightgbm_models import make_lightgbm_model
from constants import N_JOBS, RANDOM_STATE, N_SPLITS, N_HYPEROPT_EVALS
from constants import HYPEROPT_LIGHTGBM_SPACE, N_HYPEROPT_RANDOM_START, AUTOGLUON_N_SEC
import numpy as np
import pandas as pd
from functools import partial
from sklearn.model_selection import GridSearchCV, cross_validate, cross_val_score
from sklearn.metrics import get_scorer
from hyperopt import fmin, tpe, Trials, space_eval
from autogluon import TabularPrediction as task
def get_default_scoring(learning_task):
# this function defines the "default" scoring
# that will be used to evaluate ML models
if learning_task == "binary":
return "roc_auc"
elif learning_task == "multiclass":
return "roc_auc_ovr_weighted"
else:
return "neg_mean_absolute_error"
def sklearn_pipeline_evaluator(
X,
y,
pipeline,
param_grid,
groups=None,
outer_cv=None,
learning_task="regression",
scoring=None,
random_state=RANDOM_STATE,
n_splits=N_SPLITS,
n_jobs=N_JOBS,
):
if scoring == None:
scoring = get_default_scoring(learning_task)
# see here for learning metrics: https://scikit-learn.org/stable/modules/model_evaluation.html
inner_cv = get_cv(learning_task, groups is not None, n_splits, random_state)
if outer_cv is None:
outer_cv = get_cv(learning_task, groups is not None, n_splits, random_state)
# sklearn models can be optimized via grid search because they have few hyperparams
clf = GridSearchCV(
estimator=pipeline,
param_grid=param_grid,
cv=inner_cv,
scoring=scoring,
n_jobs=N_JOBS,
refit=True,
)
# this does nested cross-validation for you!
cv_results = cross_validate(
clf, X=X, y=y, groups=groups, cv=outer_cv, scoring=scoring, return_estimator=True, n_jobs=n_jobs
)
return cv_results
def lightgbm_hyperopt_evaluator(
X,
y,
groups=None,
outer_cv=None,
param_space=HYPEROPT_LIGHTGBM_SPACE,
learning_task="regression",
scoring=None,
lightgbm_objective=None,
lightgbm_metric=None,
random_state=RANDOM_STATE,
n_splits=N_SPLITS,
n_jobs=N_JOBS,
):
# get scoring and CV set up
if scoring == None:
scoring = get_default_scoring(learning_task)
scorer = get_scorer(scoring) # a function that does the scoring
if outer_cv is None:
outer_cv = get_cv(learning_task, groups is not None, n_splits, random_state)
if groups is None:
outer_cv = outer_cv.split(X, y)
else:
outer_cv = outer_cv.split(X, y, groups=groups)
# run the loop over the outer folds
# we can't use sklearn's nice nested approach unfortunately
nested_scores = []
estimators = []
for train_inds, test_inds in outer_cv:
# define the lightgbm model
lgb_model = make_lightgbm_model(
learning_task,
objective=lightgbm_objective,
metric=lightgbm_metric,
tree_learner="feature", # best for small datasets
random_state=random_state,
n_jobs=n_jobs,
)
# split the data
X_train, y_train = X[train_inds, :], y[train_inds]
X_test, y_test = X[test_inds, :], y[test_inds]
if groups is not None:
groups_train = groups[train_inds]
else:
groups_train = None
# this is the objective function that we have to minimize with hyperopt's fmin function
def obj(params):
# set the parametrs for the lightgbm model
lgb_model.set_params(**params)
# inner cross-validation
inner_cv = get_cv(learning_task, groups is not None, n_splits, random_state)
# get the inner cross-validation scores for this set of params
scores = cross_val_score(
lgb_model, X=X_train, y=y_train, groups=groups_train, cv=inner_cv, scoring=scoring, n_jobs=N_JOBS
)
# minimize means we negative mean absolute error at the end
return -np.mean(scores)
# this is how hyperopt works. you need to give it an objective and a parameter space
trials = Trials()
_ = fmin(
fn=obj,
space=param_space,
algo=partial(tpe.suggest, n_startup_jobs=N_HYPEROPT_RANDOM_START),
max_evals=N_HYPEROPT_EVALS,
trials=trials,
rstate=np.random.RandomState(random_state),
show_progressbar=False,
verbose=False,
)
# these are the best parameters hyperopt found
# hyperopt has some warts (problems with hp.choice) so we need to do this:
best_params = space_eval(param_space, trials.argmin)
# we set the lgb_model to have these parameters and train on the entire training set
lgb_model.set_params(**best_params)
lgb_model.fit(X_train, y_train)
# now we can evaluate on the test
score = scorer(lgb_model, X_test, y_test)
# store everything
nested_scores.append(score)
estimators.append(lgb_model)
return {"test_score": np.array(nested_scores), "estimator": estimators}
def autogluon_evaluator(
X,
y,
groups=None,
outer_cv=None,
learning_task="regression",
scoring=None,
autogluon_eval_metric=None,
random_state=RANDOM_STATE,
n_splits=N_SPLITS,
n_jobs=N_JOBS,
):
# get scoring and CV set up
if scoring == None:
scoring = get_default_scoring(learning_task)
scorer = get_scorer(scoring)
if outer_cv is None:
outer_cv = get_cv(learning_task, groups is not None, n_splits, random_state)
if groups is None:
outer_cv = outer_cv.split(X, y)
else:
outer_cv = outer_cv.split(X, y, groups=groups)
# autogluon eval metrics have different names
if autogluon_eval_metric is None:
if learning_task == "binary":
autogluon_eval_metric = "roc_auc"
elif learning_task == "multiclass":
# no multiclass roc_auc available
autogluon_eval_metric = "f1_weighted"
else:
autogluon_eval_metric = "mean_absolute_error"
# autogluon wants pandas dataframes
data_df = pd.DataFrame(X)
data_df["y"] = y
# run the loop over the outer folds
nested_scores = []
estimators = []
for train_inds, test_inds in outer_cv:
# define train and test splits
data_df_train = data_df.iloc[train_inds, :]
data_df_test = data_df.iloc[test_inds, :]
# define the autogluon model
# there is no inner CV for autogluon
# it does its own inner optimization
# for as long as you let it (set with time_limits)
autogluon_model = task.fit(
data_df_train,
"y",
time_limits=AUTOGLUON_N_SEC,
presets="best_quality",
eval_metric=autogluon_eval_metric,
problem_type=learning_task,
verbosity=0,
)
# now we can evalute the autogluon model on the test set
score = scorer(autogluon_model, data_df_test, data_df_test["y"])
# store everything
estimators.append(autogluon_model)
nested_scores.append(score)
return {"test_score": np.array(nested_scores), "estimator": estimators}