-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMain.m
153 lines (114 loc) · 5.52 KB
/
Main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
%% Main
clear; clc; close all;
% Seed for Reproducibility
rng(42);
% Data Gathering
dataset = readtable('dataset/HeartDisease.csv');
% Data Exploration
dataset = DataDiscovery.dataExploration(dataset);
% Data Preparation - Feature Engineering
dataset = DataPreparation.featureEngineering(dataset);
% Data Preparation Data Cleaning
dataset = DataPreparation.dataCleaning(dataset);
% Train-Test split
[trainingSet, testSet] = DataPreparation.trainTestSplit(dataset);
%% Cross Validation for Logistic Regression Models
nFolds = 5;
iterations = 1000;
withRegularization = true;
numClusters = 2;
% Cross Validation with Grid Search for Logistic Regression models
[bestHyperparamsLR, bestMetricsLR] = GridSearch.gridSearchLR(trainingSet, nFolds, iterations, withRegularization);
% Show Results
fprintf('\nBEST PERFORMANCES FOR LOGISTIC REGRESSION:\n\n');
fprintf('\nBest hyperparams:\n\n');
keysHyperparamsLR = keys(bestHyperparamsLR);
for i = 1:length(keysHyperparamsLR)
modelName = keysHyperparamsLR{i};
hyperparams = bestHyperparamsLR(modelName);
disp(['Model: ', modelName]);
disp(['Alpha: ', num2str(hyperparams('Alpha'))]);
disp(['Lambda: ', num2str(hyperparams('Lambda'))]);
disp('-----------------------------------------------');
end
fprintf('\nBest metrics:\n\n');
keysMetricsLR = keys(bestMetricsLR);
for i = 1:length(keysMetricsLR)
modelName = keysMetricsLR{i};
metrics = bestMetricsLR(modelName);
disp(['Model: ', modelName]);
disp(['Recall: ', num2str(metrics('Recall'))]);
disp('-----------------------------------------------');
end
%% Cross Validation for SVM Models
% Cross Validation with Grid Search for SVM models
[bestHyperparamsSVM, bestMetricsSVM] = GridSearch.gridSearchSVM(trainingSet, nFolds);
% Show Results
fprintf('\nBEST PERFORMANCES FOR SUPPORT VECTOR MACHINE:\n\n');
fprintf('\nBest hyperparams:\n\n');
keysHyperparamsSVM = keys(bestHyperparamsSVM);
for i = 1:length(keysHyperparamsSVM)
modelName = keysHyperparamsSVM{i};
hyperparams = bestHyperparamsSVM(modelName);
disp(['Model: ', modelName]);
disp(['Kernel: ', num2str(hyperparams('Kernel'))]);
disp('-----------------------------------------------');
end
fprintf('\nBest metrics:\n\n');
keysMetricsSVM = keys(bestMetricsSVM);
for i = 1:length(keysMetricsSVM)
modelName = keysMetricsSVM{i};
metrics = bestMetricsSVM(modelName);
disp(['Model: ', modelName]);
disp(['Recall: ', num2str(metrics('Recall'))]);
disp('-----------------------------------------------');
end
%% Final Evaluation
[xTrain, yTrain] = DataPreparation.featureSelection(trainingSet);
[xTest, yTest] = DataPreparation.featureSelection(testSet);
[xTrainReduced, xTestReduced] = DataPreparation.principalComponentAnalysis(xTrain, xTest);
models = {
'Logistic Regression Without PCA', ...
@(xTrain, xTest, yTrain, iterations, alpha, lambda, withRegularization) Models.logisticRegression(xTrain, xTest, yTrain, iterations, alpha, lambda, withRegularization), ...
'Logistic Regression With PCA', ...
@(xTrain, xTest, yTrain, iterations, alpha, lambda, withRegularization) Models.logisticRegression(xTrain, xTest, yTrain, iterations, alpha, lambda, withRegularization), ...
'SVM Without PCA', ...
@(xTrain, xTest, yTrain, kernel) Models.supportVectorMachine(xTrain, xTest, yTrain, kernel), ...
'SVM With PCA', ...
@(xTrain, xTest, yTrain, kernel) Models.supportVectorMachine(xTrain, xTest, yTrain, kernel), ...
'K-Means Without PCA', ...
@(xTrain, xTest, iterations, numClusters) Models.kMeans(xTrain, xTest, iterations, numClusters), ...
'K-Means With PCA', ...
@(xTrain, xTest, iterations, numClusters) Models.kMeans(xTrain, xTest, iterations, numClusters)
};
% Final Evaluation of all Models
for modelIdx = 1:2:length(models)
modelName = models{modelIdx};
modelFunction = models{modelIdx + 1};
fprintf(['\nFINAL EVALUATION FOR ', modelName, ':\n']);
switch modelName
case 'Logistic Regression Without PCA'
hyperparams = bestHyperparamsLR(modelName);
predictions = modelFunction(xTrain, xTest, yTrain, iterations, hyperparams('Alpha'), hyperparams('Lambda'), withRegularization);
Metrics.computeClassificationMetrics(yTest, predictions);
case 'Logistic Regression With PCA'
hyperparams = bestHyperparamsLR(modelName);
predictions = modelFunction(xTrainReduced, xTestReduced, yTrain, iterations, hyperparams('Alpha'), hyperparams('Lambda'), withRegularization);
Metrics.computeClassificationMetrics(yTest, predictions);
case 'SVM Without PCA'
hyperparams = bestHyperparamsSVM(modelName);
predictions = modelFunction(xTrain, xTest, yTrain, hyperparams('Kernel'));
Metrics.computeClassificationMetrics(yTest, predictions);
case 'SVM With PCA'
hyperparams = bestHyperparamsSVM(modelName);
predictions = modelFunction(xTrainReduced, xTestReduced, yTrain, hyperparams('Kernel'));
Metrics.computeClassificationMetrics(yTest, predictions);
case 'K-Means Without PCA'
predictions = modelFunction(xTrain, xTest, iterations, numClusters);
Metrics.computeClusteringMetrics(xTest, yTest, predictions);
case 'K-Means With PCA'
predictions = modelFunction(xTrainReduced, xTestReduced, iterations, numClusters);
Metrics.computeClusteringMetrics(xTestReduced, yTest, predictions);
end
disp('-----------------------------------------------');
end