-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest.txt
435 lines (316 loc) · 12.4 KB
/
test.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
sub1 : baseline1 AUC : 0.7147
-------------------------------------------------------------------------------------------------------------
sub2 : baseline3 AUC : 0.8100
-------------------------------------------------------------------------------------------------------------
sub3 : xgboost (baseline3에서 lgb대신 xgboost사용 나머지 피쳐 엔지니어링은 같음) AUC : 0.8078
model_params = {'max_depth':3,
'eta' : 0.1,
'objective' : 'binary:logistic',
'eval_metric' : 'logloss',
'early_stopping' : 100}
num_rounds = 400
-------------------------------------------------------------------------------------------------------------
sub4 : baseline3 lgb에 month를 3으로 바꿔봄 (잘못 생각한거 같음) AUC : 0.8059
-------------------------------------------------------------------------------------------------------------
sub5 : baseline3에 groupby cumsum Aggregation Feature Generation(7강의 피쳐 엔지니어링을 적용) AUC : 0.8371 그냥 변수만 몇개 추가했는데 엄청난 성과가 남
train = {Mean AUC = 0.780376817468989
OOF AUC = 0.7694190862487825
Precision: 0.79268
Recall: 0.16414
F1 Score: 0.27197
ROC AUC Score: 0.80211}
test = {
Mean AUC = 0.8072435497622622
OOF AUC = 0.7910998679635645 }
-------------------------------------------------------------------------------------------------------------
sub6 : sub5 feture engineering 코드에 Time-Series diff Feature Generation 추가 AUC : 0.8551
model_params = {
'objective': 'binary', # 이진 분류
'boosting_type': 'gbdt',
'metric': 'auc', # 평가 지표 설정
'feature_fraction': 0.8, # 피처 샘플링 비율
'bagging_fraction': 0.8, # 데이터 샘플링 비율
'bagging_freq': 1,
'n_estimators': 10000, # 트리 개수
'early_stopping_rounds': 100,
'seed': SEED,
'verbose': -1,
'n_jobs': -1,
}
train = {
Mean AUC = 0.7916785778782817
OOF AUC = 0.76942850881091
Precision: 0.79775
Recall: 0.17929
F1 Score: 0.29278
ROC AUC Score: 0.81599}
test = {
Mean AUC = 0.8224887907043383
OOF AUC = 0.8096290615313552}
-------------------------------------------------------------------------------------------------------------
sub7 : sub6 피쳐엔지니어링 , catboost사용 AUC : 0.8546 (lightGBM보다 성능이 살짝 낮음)
cat_params = {
'n_estimators': 10000, # 트리 개수
'learning_rate': 0.07, # 학습률
'eval_metric': 'AUC', # 평가 지표 설정
'loss_function': 'Logloss', # 손실 함수 설정
'random_seed': SEED,
'metric_period': 100,
'od_wait': 100, # early stopping round
'depth': 6, # 트리 최고 깊이
'rsm': 0.8, # 피처 샘플링 비율
}
train = {Mean AUC = 0.7968459456203606
OOF AUC = 0.7887850106346991
Precision: 0.76899
Recall: 0.20455
F1 Score: 0.32314
ROC AUC Score: 0.81828}
test = {Mean AUC = 0.8283969069858208
OOF AUC = 0.8269154937108179 }
-------------------------------------------------------------------------------------------------------------
sub8 : sub6에서 Group By nunique Aggregation Feature Generation 적용 AUC : 0.8491
-------------------------------------------------------------------------------------------------------------
sub9 : sub6 + hyperparameter tuning AUC : 0.8567
** study.optimize(objective, n_trials=10)
** study.best_value : 0.820356743437908
tuned_lgb_params = {
'objective': 'binary', # 이진 분류
'boosting_type': 'gbdt',
'metric': 'auc', # 평가 지표 설정
'feature_fraction': 0.4037902763824288, # 피처 샘플링 비율
'bagging_fraction': 0.44651205628348084, # 데이터 샘플링 비율
'bagging_freq': 4,
'n_estimators': 10000, # 트리 개수
'early_stopping_rounds': 100,
'seed': SEED,
'verbose': -1,
'n_jobs': -1,
'num_leaves': 43,
'max_bin': 140,
'min_data_in_leaf': 36,
'lambda_l1': 1.6722093110936513,
'lambda_l2': 2.1262853662727266,
}
study.best_params = {'num_leaves': 43,
'max_bin': 140,
'min_data_in_leaf': 36,
'feature_fraction': 0.4037902763824288,
'bagging_fraction': 0.44651205628348084,
'bagging_freq': 4,
'lambda_l1': 1.6722093110936513,
'lambda_l2': 2.1262853662727266 }
train = {
Mean AUC = 0.7882659790357969
OOF AUC = 0.7728813543548756
Precision: 0.81356
Recall: 0.16162
F1 Score: 0.26966
ROC AUC Score: 0.81525
}
test = {
Mean AUC = 0.8217534686252529
OOF AUC = 0.8204701774397406
}
-------------------------------------------------------------------------------------------------------------
sub10 : sub6 + hyperparameter tuning AUC : 0.8539 왜 10번 study optimize 한것보다 낮게 나오지.....
** study.optimize(objective, n_trials=30)
** study.best_value : 0.8307479292260942
study.best_params : {'num_leaves': 4,
'max_bin': 165,
'min_data_in_leaf': 10,
'feature_fraction': 0.6008908917624343,
'bagging_fraction': 0.9881125352331713,
'bagging_freq': 2,
'lambda_l1': 4.453621881942934e-08,
'lambda_l2': 9.958159639309724}
tuned_lgb_params = {
'objective': 'binary', # 이진 분류
'boosting_type': 'gbdt',
'metric': 'auc', # 평가 지표 설정
'feature_fraction': 0.6008908917624343, # 피처 샘플링 비율
'bagging_fraction': 0.9881125352331713, # 데이터 샘플링 비율
'bagging_freq': 2,
'n_estimators': 10000, # 트리 개수
'early_stopping_rounds': 100,
'seed': SEED,
'verbose': -1,
'n_jobs': -1,
'num_leaves': 4,
'max_bin': 165,
'min_data_in_leaf': 10,
'lambda_l1': 4.453621881942934e-08,
'lambda_l2': 9.958159639309724,
}
train = {Mean AUC = 0.8019958574779184
OOF AUC = 0.7977547081402095
Precision: 0.80936
Recall: 0.20370
F1 Score: 0.32549
ROC AUC Score: 0.81988}
test = {
Mean AUC = 0.8314933035468216
OOF AUC = 0.8305975502711276
}
-------------------------------------------------------------------------------------------------------------
sub11 : cat_boost + hyperparameter tuning AUC : 0.8465
**study.best_value : 0.8192443104772174
iter = 10
{'iterations': 180,
'depth': 5,
'learning_rate': 0.06173991466879898,
'random_strength': 65,
'bagging_temperature': 0.43432569253026015,
'od_type': 'Iter'}
tuned_cat_params = {
#'n_estimators': 10000, # 트리 개수
'learning_rate': 0.06173991466879898, # 학습률
'eval_metric': 'AUC', # 평가 지표 설정
'loss_function': 'Logloss', # 손실 함수 설정
'random_seed': SEED,
'metric_period': 100,
'od_wait': 100, # early stopping round
'depth': 5, # 트리 최고 깊이
'rsm': 0.8, # 피처 샘플링 비율
'iterations': 180,
'random_strength': 65,
'bagging_temperature': 0.43432569253026015,
'od_type': 'Iter'
}
train = {Mean AUC = 0.7895264088380137
OOF AUC = 0.7855971601095717
Precision: 0.78146
Recall: 0.19865
F1 Score: 0.31678
ROC AUC Score: 0.81352}
test = {Mean AUC = 0.8188936021820468
OOF AUC = 0.8185367867767515}
-------------------------------------------------------------------------------------------------------------
sub12 : sub6에 aggregation function에 var(), sem() 추가 AUC : 0.8551 (추가 안한것과 차이가 없음)
train = {Mean AUC = 0.7956256856392856
OOF AUC = 0.7841413160969467
Precision: 0.86957
Recall: 0.15152
F1 Score: 0.25806
ROC AUC Score: 0.81728 }
test = {Mean AUC = 0.8203748659559811
OOF AUC = 0.8127671547113542}
-------------------------------------------------------------------------------------------------------------
sub13 : feature engineering에서 total 변수만 사용 AUC : 0.8537
sub9의 tuned_params를 사용
train = {
Mean AUC = 0.8009248501076534
OOF AUC = 0.795723157214402
Precision: 0.83521
Recall: 0.18771
F1 Score: 0.30653
ROC AUC Score: 0.81879
}
test = {
Mean AUC = 0.8292915947198949
OOF AUC = 0.828485561392487
}
-------------------------------------------------------------------------------------------------------------
sub 14 feature engineering2의 nunique 추가 + 하이퍼 파라미터 최적화 AUC : 0.8532
train = {
Mean AUC = 0.801311056114962
OOF AUC = 0.7973973487469388
Precision: 0.79479
Recall: 0.20539
F1 Score: 0.32642
ROC AUC Score: 0.81991 }
test = {
Mean AUC = 0.8292356730898416
OOF AUC = 0.8283959838051147
}
tuned_lgb_params = {
'objective': 'binary', # 이진 분류
'boosting_type': 'gbdt',
'metric': 'auc', # 평가 지표 설정
'feature_fraction': 0.9896870723092334, # 피처 샘플링 비율
'bagging_fraction': 0.7788996383625406, # 데이터 샘플링 비율
'bagging_freq': 6,
'n_estimators': 10000, # 트리 개수
'early_stopping_rounds': 100,
'seed': SEED,
'verbose': -1,
'n_jobs': -1,
'num_leaves': 4,
'max_bin': 207,
'min_data_in_leaf': 22,
'lambda_l1':0.001327322274659488,
'lambda_l2': 0.0002049615209545024,
}
-------------------------------------------------------------------------------------------------------------
sub15 : agg_func에서 mean, max, min, sum, skew 값만 사용 AUC : 0.8536
하이퍼파라미터 튜닝x, test AUC 값이 유의미 하게 상승해서 제출해봄
train = {
Mean AUC = 0.7941014295202646
OOF AUC = 0.7617751781329554
Precision: 0.83645
Recall: 0.15067
F1 Score: 0.25535
ROC AUC Score: 0.81438
}
test = {
Mean AUC = 0.8302864572067601
OOF AUC = 0.8292445109824906
}
-------------------------------------------------------------------------------------------------------------
su16 : sub6에 float64인 col에 quantile_transfomr 적용
train = {
Mean AUC = 0.7964314263024259
OOF AUC = 0.7891707540422792
Precision: 0.71676
Recall: 0.20875
F1 Score: 0.32334
ROC AUC Score: 0.81585 }
test = {
Mean AUC = 0.8291012746273079
OOF AUC = 0.8287029610915804
}
-------------------------------------------------------------------------------------------------------------
sub17 : qt + nunique + parameter tuning AUC : 0.8558
하이퍼 파라미터 튜닝 전
train = {
Mean AUC = 0.7942708266272538
OOF AUC = 0.7776709241137092
Precision: 0.83663
Recall: 0.14226
F1 Score: 0.24317
ROC AUC Score: 0.81405
}
test = {
Mean AUC = 0.8239138128412447
OOF AUC = 0.8191932558937411
}
튜닝후
** study.best_params
{'num_leaves': 159,
'max_bin': 135,
'min_data_in_leaf': 28,
'feature_fraction': 0.9068781382922049,
'bagging_fraction': 0.5427572227580095,
'bagging_freq': 4,
'lambda_l1': 2.531979160239369,
'lambda_l2': 1.645404534997536e-05}
** study.best_value
0.8166303158032315
train = {
Mean AUC = 0.7897944159433815
OOF AUC = 0.7817825347110902
Precision: 0.79496
Recall: 0.18603
F1 Score: 0.30150
ROC AUC Score: 0.81514
}
test = {
Mean AUC = 0.8176173759994285
OOF AUC = 0.8108646752779968
}
-------------------------------------------------------------------------------------------------------------
sub 18 : sub9 + sub11 ensemble AUC : 0.8544
-------------------------------------------------------------------------------------------------------------
sub19 : sub9 + sub17 ensemble AUC : 0.8572(1등)
가장 성능이 좋은 두 결과를 앙상블 했다. 같은 lgb끼리 앙상블을 해도 성능이 올라 신기하다.
피쳐엔지니어링의 인사이트를 더이상 못찾을 것 같다. 너무 어렵다.