Skip to content

Commit

Permalink
Parametrice report of sum of matrices. Closes #29. Related #35
Browse files Browse the repository at this point in the history
  • Loading branch information
javism committed Jan 31, 2018
1 parent fe91667 commit c59baba
Show file tree
Hide file tree
Showing 21 changed files with 229 additions and 259 deletions.
1 change: 1 addition & 0 deletions doc/orca-tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ ORCA uses the `Experiments` folder to store all the results of the different exp
- Optimal hyper-parameters values obtained after nested cross-validation ('OptHyperparams').
- Computational time results ('Times').

If you provide the option `report_sum = true` in `{general-conf}`, additionally the same metrics will be calculated with a matrix that is the sum of the generalization matrices (as Weka does). **Note that this only makes sense in the case of a k-fold experimental design**. With this option active, two additional reports will be generated (`mean-results_matrices_sum_train.csv` and `mean-results_matrices_sum_test.csv`)

## Running algorithms with ORCA API

Expand Down
16 changes: 0 additions & 16 deletions src/Config.m
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,6 @@

obj.exps{i}=eObj;
end


% % TODO
% if Config.validateconfig(exps)
% return
% else
% error('Invalid INI file %s', confFile)
% end
end
end

methods(Static = true, Access = private)

function valid = validateconfig(exps)
%VALIDATECONFIG Validates set of experiments
%TODO
end
end
end
Expand Down
7 changes: 6 additions & 1 deletion src/Experiment.m
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
cvCriteria = MAE;
crossvalide = 0;
resultsDir = '';
% calculate metrics with the sum of matrices (only suitable for
% k-fold experimental design)
report_sum = 0;
seed = 1;
parameters; % parameters to optimize
end
Expand Down Expand Up @@ -81,7 +84,6 @@
% Copy ini values to corresponding object properties

% General experiment properties
% TODO: check robustness and document behaviour of ini file
if expObj.general.isKey('num_folds')
obj.data.nOfFolds = str2num(expObj.general('num_folds'));
end
Expand All @@ -95,6 +97,9 @@
if expObj.general.isKey('seed')
obj.seed = str2num(expObj.general('seed'));
end
if expObj.general.isKey('report_sum')
obj.report_sum = str2num(expObj.general('report_sum'));
end

try
obj.data.directory = expObj.general('directory');
Expand Down
124 changes: 52 additions & 72 deletions src/Utilities.m
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@

disp('Calculating results...');
% Train results (note last argument)
Utilities.results([logsDir '/' 'Results'],1);

Utilities.results([logsDir '/' 'Results'],'report_sum', myExperiment.report_sum, 'train', true);
% Test results
Utilities.results([logsDir '/' 'Results']);
Utilities.results([logsDir '/' 'Results'], 'report_sum', myExperiment.report_sum);
%rmpath('Measures');
%rmpath('Algorithms');

Expand All @@ -116,7 +117,7 @@ function octaveParallelAuxFunction(experimentToRun,logsDir)
end
end

function results(experiment_folder,train)
function results(experiment_folder,varargin)
% RESULTS Function for computing the results
% RESULTS(EXPERIMENT_FOLDER) computes results of predictions
% stored in EXPERIMENT_FOLDER. It generates CSV files with
Expand All @@ -131,7 +132,7 @@ function results(experiment_folder,train)
% confussion matrices of the _k_ experiments (as Weka does). Each column
% presents the performance of this single matrix.
%
% RESULTS(EXPERIMENT_FOLDER,TRAIN) same as
% RESULTS(EXPERIMENT_FOLDER,'TRAIN', true) same as
% RESULTS(EXPERIMENT_FOLDER) but calculates performance in train
% data. It can be usefull to evaluate overfitting.
%
Expand All @@ -142,24 +143,21 @@ function results(experiment_folder,train)
addpath(fullfile(fileparts(which('Utilities.m')),'Measures'));
addpath(fullfile(fileparts(which('Utilities.m')),'Algorithms'));

if nargin < 2
train = 0;
elseif nargin == 1
train = train;
end
experiments = dir(experiment_folder);

%idx=strfind(experiment_folder,'Results');
%scriptpath = [experiment_folder(1:idx-1)];
opt.train = false;
opt.report_sum = false;

opt = Utilities.parseVarArgs(opt, varargin);

experiments = dir(experiment_folder);

for i=1:numel(experiments)
if ~(any(strcmp(experiments(i).name, {'.', '..'}))) && experiments(i).isdir
disp([experiment_folder '/' experiments(i).name '/' 'dataset'])
fid = fopen([experiment_folder '/' experiments(i).name '/' 'dataset'],'r');
datasetPath = fgetl(fid);
fclose(fid);

if train == 1
if opt.train
predicted_files = dir([experiment_folder '/' experiments(i).name '/' 'Predictions' '/' 'train_*']);
else
predicted_files = dir([experiment_folder '/' experiments(i).name '/' 'Predictions' '/' 'test_*']);
Expand All @@ -175,27 +173,19 @@ function results(experiment_folder,train)
time_files = dir([experiment_folder '/' experiments(i).name '/' 'Times' '/' '*.*']);
hyp_files = dir([experiment_folder '/' experiments(i).name '/' 'OptHyperparams' '/' '*.*']);

if train == 1
if opt.train
guess_files = dir([experiment_folder '/' experiments(i).name '/' 'Guess' '/' 'train_*']);
else
guess_files = dir([experiment_folder '/' experiments(i).name '/' 'Guess' '/' 'test_*']);
end

%str=predicted_files(1).name;
%[matchstart,matchend] = regexp( str,'_(.+)\.\d+');
%dataset=str(matchstart+1:matchend-2);

%auxscript = experimentos(i).name;
%[matchstart,matchend]=regexp(auxscript,dataset);
%basescript = ['exp-' auxscript(matchend+2:end) '-' dataset '-'];

% Discard "." and ".."
if ~(exist ('OCTAVE_VERSION', 'builtin') > 0)
time_files = time_files(3:numel(time_files));
hyp_files = hyp_files(3:numel(hyp_files));
end

if train == 1
if opt.train
real_files = dir([datasetPath '/' 'train_*']);
else
real_files = dir([datasetPath '/' 'test_*']);
Expand Down Expand Up @@ -263,7 +253,7 @@ function results(experiment_folder,train)
results_matrix = results_matrix';

% Results for the independent dataset
if train == 1
if opt.train
fid = fopen([experiment_folder '/' experiments(i).name '/' 'results_train.csv'],'w');
else
fid = fopen([experiment_folder '/' experiments(i).name '/' 'results_test.csv'],'w');
Expand All @@ -284,9 +274,8 @@ function results(experiment_folder,train)
fclose(fid);

% Confusion matrices and sum of confusion matrices
% TODO PARAMETRIZAR
if false
if train == 1
if opt.report_sum
if opt.train
fid = fopen([experiment_folder '/' experiments(i).name '/' 'matrices_train.txt'],'w');
else
fid = fopen([experiment_folder '/' experiments(i).name '/' 'matrices_test.txt'],'w');
Expand Down Expand Up @@ -326,7 +315,7 @@ function results(experiment_folder,train)
means = mean(results_matrix,1);
stdev = std(results_matrix,0,1);

if train == 1
if opt.train
if ~exist([experiment_folder '/' 'mean-results_train.csv'],'file')
add_head = 1;
else
Expand Down Expand Up @@ -363,9 +352,8 @@ function results(experiment_folder,train)


% Confusion matrices and sum of confusion matrices
% TODO PARAMETRIZAR
if false
if train == 1
if opt.report_sum
if opt.train
fid = fopen([experiment_folder '/' 'mean-results_matrices_sum_train.csv'],'at');
else
fid = fopen([experiment_folder '/' 'mean-results_matrices_sum_test.csv'],'at');
Expand Down Expand Up @@ -503,22 +491,6 @@ function runExperimentFold(confFile)
dbs(1) = [];
validDataSets = 1;

% Currently, 'all' is not working
%if strcmpi(dataSetNames{1}, 'all')
% trainFileNames = cell(size(dbs,1),1);
% testFileNames = cell(size(dbs,1),1);
% for dd=1:size(dbs,1)
% % get directory
% if dbs(dd).isdir,
% ejemplo = [directory '/' dbs(dd).name '/' 'matlab' '/' 'train_' dbs(dd).name '.*'];
% trainFileNames{validDataSets} = dir(ejemplo);
% ejemplo = [directory '/' dbs(dd).name '/' 'matlab' '/' 'test_' dbs(dd).name '.*'];
% testFileNames{validDataSets} = dir(ejemplo);
% validDataSets = validDataSets + 1;
% end
%
% end
%else
trainFileNames = cell(numel(dataSetNames),1);
testFileNames = cell(numel(dataSetNames),1);
for j=1:numel(dataSetNames)
Expand All @@ -531,7 +503,6 @@ function runExperimentFold(confFile)
validDataSets = validDataSets + 1;
end
end
%end
end

function checkDatasets(basedir, datasets)
Expand Down Expand Up @@ -647,39 +618,48 @@ function closePool()
% - 'closepool': whether to close or not the pool after
% experiments. Default 'true'
% Solution adapted from https://stackoverflow.com/questions/2775263/how-to-deal-with-name-value-pairs-of-function-arguments-in-matlab#2776238

if (exist ('OCTAVE_VERSION', 'builtin') > 0)
maximum_ncores = nproc;
else
maximum_ncores = feature('numCores');
end

varargin = varargin{1};

options = struct('parallel',false,'numcores',maximum_ncores,'closepool',true);

%# read the acceptable names
optionNames = fieldnames(options);

%# count arguments
nArgs = length(varargin);
if mod(nArgs,2)
error('parseParArgs needs propertyName/propertyValue pairs')
end

for pair = reshape(varargin,2,[]) %# pair is {propName;propValue}
inpName = lower(pair{1}); %# make case insensitive

if any(strcmp(inpName,optionNames))
%# overwrite options.
options.(inpName) = pair{2};
else
error('%s is not a recognized parameter name',inpName)
varargin = varargin{:};
if ~isempty(varargin)
options = Utilities.parseVarArgs(options, varargin);
if options.parallel && options.numcores <2
disp('Number of cores to low, setting to default number of cores')
options.numcores = maximum_ncores;
end
end

if options.parallel && options.numcores <2
disp('Number of cores to low, setting to default number of cores')
options.numcores = maximum_ncores;
end

function options = parseVarArgs(options, varargin)
if ~isempty(varargin{:})
par = varargin{:};

% read the acceptable names
optionNames = fieldnames(options);

% count arguments
nArgs = length(par);
if mod(nArgs,2)
error('parseVarArgs needs propertyName/propertyValue pairs')
end

for pair = reshape(par,2,[]) % pair is {propName;propValue}
inpName = lower(pair{1}); % make case insensitive

if any(strcmp(inpName,optionNames))
% overwrite options.
options.(inpName) = pair{2};
else
error('%s is not a recognized parameter name',inpName)
end
end
end
end
end
Expand Down
24 changes: 12 additions & 12 deletions src/config-files/cssvc.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
{general-conf}
seed = 1
; Datasets path
basedir = ../../../datasets/ordinal/real/30-holdout
basedir = ../../../datasets/ordinal-regression
; Datasets to process (comma separated list or all to process all)
datasets = automobile,balance-scale,bondrate,car,contact-lenses,ERA,ESL,eucalyptus,LEV,marketing,newthyroid,pasture,squash-stored,squash-unstored,SWD,tae,thyroid,toy,winequality-red,winequality-white
datasets = all
; Activate data standardization
standarize = true
; Number of folds for the parameters optimization
Expand All @@ -32,8 +32,8 @@ k = 10.^(-3:1:3)
[cssvc-mae-regression5]
{general-conf}
seed = 1
basedir = ../../../datasets/ordinal/regression/5bins
datasets = abalone,housing,machine,pyrim,stock,bank1-5,bank2-5,calhousing-5,census1-5,census2-5,computer1-5,computer2-5
basedir = ../../../datasets/discretized-regression/5bins
datasets = all
standarize = true
num_folds = 5
cvmetric = mae
Expand All @@ -50,8 +50,8 @@ k = 10.^(-3:1:3)
[cssvc-mae-regression10]
{general-conf}
seed = 1
basedir = ../../../datasets/ordinal/regression/10bins
datasets = abalone10,housing10,machine10,pyrim10,stock10bank1-10,bank2-10,calhousing-10,census1-10,census2-10,computer1-10,computer2-10
basedir = ../../../datasets/discretized-regression/10bins
datasets = all
standarize = true
num_folds = 5
cvmetric = mae
Expand All @@ -67,8 +67,8 @@ k = 10.^(-3:1:3)
[cssvc-mze-real]
{general-conf}
seed = 1
basedir = ../../../datasets/ordinal/real/30-holdout
datasets = automobile,balance-scale,bondrate,car,contact-lenses,ERA,ESL,eucalyptus,LEV,marketing,newthyroid,pasture,squash-stored,squash-unstored,SWD,tae,thyroid,toy,winequality-red,winequality-white
basedir = ../../../datasets/ordinal-regression
datasets = all
standarize = true
num_folds = 5
cvmetric = mze
Expand All @@ -84,8 +84,8 @@ k = 10.^(-3:1:3)
[cssvc-mze-regression5]
{general-conf}
seed = 1
basedir = ../../../datasets/ordinal/regression/5bins
datasets = abalone,housing,machine,pyrim,stock,bank1-5,bank2-5,calhousing-5,census1-5,census2-5,computer1-5,computer2-5
basedir = ../../../datasets/discretized-regression/5bins
datasets = all
standarize = true
num_folds = 5
cvmetric = mze
Expand All @@ -102,8 +102,8 @@ k = 10.^(-3:1:3)
[cssvc-mze-regression10]
{general-conf}
seed = 1
basedir = ../../../datasets/ordinal/regression/10bins
datasets = abalone10,housing10,machine10,pyrim10,stock10bank1-10,bank2-10,calhousing-10,census1-10,census2-10,computer1-10,computer2-10
basedir = ../../../datasets/discretized-regression/10bins
datasets = all
standarize = true
num_folds = 5
cvmetric = mze
Expand Down
Loading

0 comments on commit c59baba

Please sign in to comment.