diff --git a/NN/nnexamples.m b/NN/nnexamples.m index 6b4e452..3a130d7 100644 --- a/NN/nnexamples.m +++ b/NN/nnexamples.m @@ -32,31 +32,14 @@ disp([num2str(er * 100) '% error']); figure; visualize(nn.W{1}', 1) %Visualize the weights -%% ex3: Train a denoising autoencoder (DAE) and use it to initialize the weights for a NN -DAE = nnsetup([784 100 784]); -DAE.lambda = 1e-5; -DAE.alpha = 1e-0; -opts.numepochs = 1; -opts.batchsize = 100; -% This is a bit of a hack so we can apply different noise for each epoch. -% We should apply the noise when selecting the batches really. -for i = 1 : 10 - DAE = nntrain(DAE, train_x .* double(rand(size(train_x)) > 0.5), train_x, opts); -end - -% Use the DAE weights and biases to initialize a standard NN -nn = nnsetup([784 100 10]); -nn.W{1} = DAE.W{1}; -nn.b{1} = DAE.b{1}; - -nn.lambda = 1e-5; -nn.alpha = 1e-0; -opts.numepochs = 10; -opts.batchsize = 100; +%% ex3 using 800 800 hidden units w. dropout +nn = nnsetup([784 800 800 10]); +nn.dropoutFraction = 0.5; +nn.alpha = 1e-0; % Learning rate +opts.numepochs = 100; % Number of full sweeps through data +opts.batchsize = 1000; % Take a mean gradient step over this many samples nn = nntrain(nn, train_x, train_y, opts); [er, bad] = nntest(nn, test_x, test_y); - disp([num2str(er * 100) '% error']); -figure; visualize(DAE.W{1}', 1) % Visualize the DAE weights -figure; visualize(nn.W{1}', 1) % Visualize the NN weights +figure; visualize(nn.W{1}', 1) %Visualize the weights \ No newline at end of file diff --git a/NN/nnff.m b/NN/nnff.m index f6d3d64..02f8011 100644 --- a/NN/nnff.m +++ b/NN/nnff.m @@ -7,7 +7,11 @@ %% feedforward pass for i = 2 : n - net.a{i} = sigm(repmat(net.b{i - 1}', m, 1) + net.a{i - 1} * net.W{i - 1}' + net.eta * randn(m, numel(net.b{i - 1}))); + net.a{i} = sigm(repmat(net.b{i - 1}', m, 1) + net.a{i - 1} * net.W{i - 1}'); + if(net.dropoutFraction > 0 && inet.dropoutFraction); + end + net.p{i} = 0.99 * net.p{i} + 0.01 * mean(net.a{i}, 1); end diff --git a/NN/nnsetup.m b/NN/nnsetup.m index df8ba78..479b312 100644 --- a/NN/nnsetup.m +++ b/NN/nnsetup.m @@ -5,12 +5,13 @@ nn.size = size; nn.n = numel(nn.size); - nn.alpha = 0.1; % learning rate - nn.lambda = 0; % L2 regularization - nn.beta = 0; % sparsity rate - nn.rho = 0.05; % sparsity target - nn.eta = 0; % hidden layer noise level. - nn.inl = 0; % input noise level. Used for Denoising AutoEncoders + + nn.alpha = 0.1; % learning rate + nn.lambda = 0; % L2 regularization + nn.beta = 0; % sparsity rate + nn.rho = 0.05; % sparsity target + nn.inputZeroMaskedFraction = 0; % Used for Denoising AutoEncoders + nn.dropoutFraction = 0; % dropout level (http://www.cs.toronto.edu/~hinton/absps/dropout.pdf) for i = 2 : nn.n nn.b{i - 1} = zeros(nn.size(i), 1); % biases diff --git a/NN/nntrain.m b/NN/nntrain.m index c64f7c9..f05cf94 100644 --- a/NN/nntrain.m +++ b/NN/nntrain.m @@ -17,6 +17,11 @@ kk = randperm(m); for l = 1 : numbatches batch_x = x(kk((l - 1) * batchsize + 1 : l * batchsize), :); + %add noise to input (for use in denoising autoencoder) + if(nn.inputZeroMaskedFraction ~= 0) + batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction); + end + batch_y = y(kk((l - 1) * batchsize + 1 : l * batchsize), :); nn = nnff(nn, batch_x, batch_y); @@ -35,3 +40,4 @@ disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mean squared error is ' num2str(nn.rL(end))]); end end + diff --git a/SAE/saeexamples.m b/SAE/saeexamples.m index a7bdfe5..6410a43 100644 --- a/SAE/saeexamples.m +++ b/SAE/saeexamples.m @@ -1,9 +1,4 @@ clear all; close all; clc; - -[pathstr, name, ext] = fileparts(mfilename('fullpath')); -addpath(strcat(pathstr, '/../data')); -addpath(strcat(pathstr, '/../util')); - load mnist_uint8; train_x = double(train_x)/255; @@ -11,29 +6,25 @@ train_y = double(train_y); test_y = double(test_y); -%% ex1 train a 100-100 hidden unit SDAE and use it to initialize a FFNN +%% ex1 train a 100 hidden unit SDAE and use it to initialize a FFNN % Setup and train a stacked denoising autoencoder (SDAE) -sae.size = [100 100]; -sae = saesetup(sae, train_x); +sae = saesetup([784 100]); -sae.ae{1}.alpha = 1; -sae.ae{1}.inl = 0.5; % fraction of zero-masked inputs (the noise) -sae.ae{2}.alpha = 1; -sae.ae{2}.inl = 0.5; % fraction of zero-masked inputs (the noise) +sae.ae{1}.alpha = 0.5; +sae.ae{1}.inputZeroMaskedFraction = 0.5; opts.numepochs = 5; opts.batchsize = 100; sae = saetrain(sae, train_x, opts); +figure; visualize(sae.ae{1}.W{1}', 1) % Visualize the weights + % use the SDAE to initialize a FFNN -nn.size = [100 100]; -nn = nnsetup(nn, train_x, train_y); +nn = nnsetup([784 100 10]); nn.W{1} = sae.ae{1}.W{1}; nn.b{1} = sae.ae{1}.b{1}; -nn.W{2} = sae.ae{2}.W{1}; -nn.b{2} = sae.ae{2}.b{1}; nn.lambda = 1e-5; % L2 weight decay nn.alpha = 1e-0; % Learning rate @@ -43,6 +34,5 @@ nn = nntrain(nn, train_x, train_y, opts); [er, bad] = nntest(nn, test_x, test_y); -%disp([num2str(er * 100) '% error']); -printf('%5.2f% error', 100 * er); +disp([num2str(er * 100) '% error']); figure; visualize(nn.W{1}', 1) % Visualize the weights diff --git a/SAE/saesetup.m b/SAE/saesetup.m index b12c61a..aee4f3f 100644 --- a/SAE/saesetup.m +++ b/SAE/saesetup.m @@ -1,6 +1,5 @@ -function sae = saesetup(sae, x) - for u = 1 : numel(sae.size) - sae.ae{u} = nnsetup(struct('size', sae.size(u)), x, x); - x = zeros(1, sae.size(u)); +function sae = saesetup(size) + for u = 2 : numel(size) + sae.ae{u-1} = nnsetup([size(u-1) size(u) size(u-1)]); end end