-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_model.py
105 lines (86 loc) · 3.08 KB
/
find_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
# Author: Vera Bernhard // minor changes by Polina Mashkovtseva
# Date: 21.06.2021 // 31.05.2023
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB
import numpy as np
from stress_detector import StressDetector
# This file describes the steps of finding the best model
def get_best_classifiers(wav_path: str, features: list, classifier_names: list):
"""Check which 4 algorithms perform best, with and without post-processing"""
sd = StressDetector(wav_path, features)
sd.get_features('./data/complete_features.tsv')
classifiers = [
KNeighborsClassifier(
n_jobs=-1
),
LogisticRegression(),
SVC(probability=True,
random_state=42),
DecisionTreeClassifier(
random_state=42),
RandomForestClassifier(
random_state=42,
n_jobs=-1),
MLPClassifier(
random_state=42),
AdaBoostClassifier(
random_state=42),
GaussianNB()]
# with post-processing
results_post = (sd.test_classifiers(classifiers, classifier_names)).sort_values('f1')
print(f"With Post-Processing:\n {results_post}")
# ==> Best performing models: Nearest Neighbour, SVM, Random Forest, Neural Net
def train_best_model(wav_path, features):
"""Train best system on all data and save it"""
mlp_abs_cont = MLPClassifier(
random_state=42,
max_iter=300,
# hyperparameters found by gridsearch
activation='relu',
alpha=0.0001,
hidden_layer_sizes=(100, 50),
solver='adam'
)
nn_abs_cont = KNeighborsClassifier(
n_jobs=-1,
# hyperparameters found by gridsearch
algorithm='auto',
metric='manhattan',
n_neighbors=3,
weights='distance'
)
svm_abs_cont = SVC(
random_state=42,
probability=True,
# hyperparameters found by gridsearch
C=10.0,
class_weight=None,
gamma='scale',
kernel='rbf'
)
rf_abs_cont = RandomForestClassifier(
random_state=42,
n_jobs=-1,
# hyperparameters found by gridsearch
class_weight='balanced',
criterion='entropy',
max_depth=50,
min_samples_split=5,
n_estimators=200
)
vot_abs_cont = VotingClassifier(
estimators=[('mlp', mlp_abs_cont), ('nn', nn_abs_cont),
('svm', svm_abs_cont), ('rf', rf_abs_cont)],
voting='soft')
sd = StressDetector(wav_path, features)
sd.get_features('./data/complete_features.tsv')
sd.train_all(vot_abs_cont, 'vot', save=True)
evaluation = sd.train(vot_abs_cont, features, matrix=True)
print('F1 Score: {}'.format(np.mean(evaluation['f1'])))
print('Accuracy: {}'.format(np.mean(evaluation['accuracy'])))