fixes rasmusbergpalm#6. Thanks. Dropout added

chvillap · Oct 20, 2012 · 54c1f48 · 54c1f48
1 parent b829694
commit 54c1f48
Show file tree

Hide file tree

Showing 6 changed files with 36 additions and 53 deletions.
diff --git a/NN/nnexamples.m b/NN/nnexamples.m
@@ -32,31 +32,14 @@
 disp([num2str(er * 100) '% error']);
 figure; visualize(nn.W{1}', 1)   %Visualize the weights
 
-%%  ex3: Train a denoising autoencoder (DAE) and use it to initialize the weights for a NN
-DAE = nnsetup([784 100 784]);
-DAE.lambda = 1e-5;
-DAE.alpha  = 1e-0;
-opts.numepochs =   1;
-opts.batchsize = 100;
-%  This is a bit of a hack so we can apply different noise for each epoch.
-%  We should apply the noise when selecting the batches really.
-for i = 1 : 10
-    DAE = nntrain(DAE, train_x .* double(rand(size(train_x)) > 0.5), train_x, opts);
-end
-
-%  Use the DAE weights and biases to initialize a standard NN
-nn = nnsetup([784 100 10]);
-nn.W{1} = DAE.W{1};
-nn.b{1} = DAE.b{1};
-
-nn.lambda = 1e-5;
-nn.alpha  = 1e-0;
-opts.numepochs =  10;
-opts.batchsize = 100;
+%% ex3 using 800 800 hidden units w. dropout
+nn = nnsetup([784 800 800 10]);
+nn.dropoutFraction = 0.5;
+nn.alpha  = 1e-0;       %  Learning rate
+opts.numepochs = 100;   %  Number of full sweeps through data
+opts.batchsize = 1000;   %  Take a mean gradient step over this many samples
 nn = nntrain(nn, train_x, train_y, opts);
 
 [er, bad] = nntest(nn, test_x, test_y);
-
 disp([num2str(er * 100) '% error']);
-figure; visualize(DAE.W{1}', 1)   %  Visualize the DAE weights
-figure; visualize(nn.W{1}',  1)   %  Visualize the NN weights
+figure; visualize(nn.W{1}', 1)   %Visualize the weights
diff --git a/NN/nnff.m b/NN/nnff.m
@@ -7,7 +7,11 @@
 
     %%  feedforward pass
     for i = 2 : n
-        net.a{i} = sigm(repmat(net.b{i - 1}', m, 1) + net.a{i - 1} * net.W{i - 1}' + net.eta * randn(m, numel(net.b{i - 1})));
+        net.a{i} = sigm(repmat(net.b{i - 1}', m, 1) + net.a{i - 1} * net.W{i - 1}');
+        if(net.dropoutFraction > 0 && i<n) 
+            net.a{i} = net.a{i}.*(rand(size(net.a{i}))>net.dropoutFraction);
+        end
+
         net.p{i} = 0.99 * net.p{i} + 0.01 * mean(net.a{i}, 1);
     end
 

diff --git a/NN/nnsetup.m b/NN/nnsetup.m
@@ -5,12 +5,13 @@
 
     nn.size   = size;
     nn.n      = numel(nn.size);
-    nn.alpha  = 0.1;    %  learning rate 
-    nn.lambda = 0;      %  L2 regularization
-    nn.beta   = 0;      %  sparsity rate
-    nn.rho    = 0.05;   %  sparsity target
-    nn.eta    = 0;      %  hidden layer noise level.
-    nn.inl    = 0;      %  input noise level. Used for Denoising AutoEncoders
+
+    nn.alpha                            = 0.1;    %  learning rate 
+    nn.lambda                           = 0;      %  L2 regularization
+    nn.beta                             = 0;      %  sparsity rate
+    nn.rho                              = 0.05;   %  sparsity target
+    nn.inputZeroMaskedFraction     = 0;      %  Used for Denoising AutoEncoders
+    nn.dropoutFraction                  = 0;      %  dropout level (http://www.cs.toronto.edu/~hinton/absps/dropout.pdf)
 
     for i = 2 : nn.n
         nn.b{i - 1} = zeros(nn.size(i), 1);   %  biases

diff --git a/NN/nntrain.m b/NN/nntrain.m
@@ -17,6 +17,11 @@
         kk = randperm(m);
         for l = 1 : numbatches
             batch_x = x(kk((l - 1) * batchsize + 1 : l * batchsize), :);
+            %add noise to input (for use in denoising autoencoder)
+            if(nn.inputZeroMaskedFraction ~= 0)
+                batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction);
+            end
+
             batch_y = y(kk((l - 1) * batchsize + 1 : l * batchsize), :);
 
             nn = nnff(nn, batch_x, batch_y);
@@ -35,3 +40,4 @@
         disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mean squared error is ' num2str(nn.rL(end))]);
     end
 end
+
diff --git a/SAE/saeexamples.m b/SAE/saeexamples.m
@@ -1,39 +1,30 @@
 clear all; close all; clc;
-
-[pathstr, name, ext] = fileparts(mfilename('fullpath'));
-addpath(strcat(pathstr, '/../data'));
-addpath(strcat(pathstr, '/../util'));
-
 load mnist_uint8;
 
 train_x = double(train_x)/255;
 test_x  = double(test_x)/255;
 train_y = double(train_y);
 test_y  = double(test_y);
 
-%%  ex1 train a 100-100 hidden unit SDAE and use it to initialize a FFNN
+%%  ex1 train a 100 hidden unit SDAE and use it to initialize a FFNN
 %  Setup and train a stacked denoising autoencoder (SDAE)
-sae.size = [100 100];
-sae = saesetup(sae, train_x);
+sae = saesetup([784 100]);
 
-sae.ae{1}.alpha = 1;
-sae.ae{1}.inl   = 0.5;   %  fraction of zero-masked inputs (the noise)
-sae.ae{2}.alpha = 1;
-sae.ae{2}.inl   = 0.5;   %  fraction of zero-masked inputs (the noise)
+sae.ae{1}.alpha                     = 0.5;
+sae.ae{1}.inputZeroMaskedFraction   = 0.5;
 
 opts.numepochs =   5;
 opts.batchsize = 100;
 
 sae = saetrain(sae, train_x, opts);
 
+figure; visualize(sae.ae{1}.W{1}', 1)   %  Visualize the weights
+
 %  use the SDAE to initialize a FFNN
-nn.size = [100 100]; 
-nn = nnsetup(nn, train_x, train_y);
+nn = nnsetup([784 100 10]);
 
 nn.W{1} = sae.ae{1}.W{1};
 nn.b{1} = sae.ae{1}.b{1};
-nn.W{2} = sae.ae{2}.W{1};
-nn.b{2} = sae.ae{2}.b{1};
 nn.lambda = 1e-5;   %  L2 weight decay
 nn.alpha  = 1e-0;   %  Learning rate
 
@@ -43,6 +34,5 @@
 nn = nntrain(nn, train_x, train_y, opts);
 
 [er, bad] = nntest(nn, test_x, test_y);
-%disp([num2str(er * 100) '% error']);
-printf('%5.2f% error', 100 * er);
+disp([num2str(er * 100) '% error']);
 figure; visualize(nn.W{1}', 1)   %  Visualize the weights
diff --git a/SAE/saesetup.m b/SAE/saesetup.m
@@ -1,6 +1,5 @@
-function sae = saesetup(sae, x)
-    for u = 1 : numel(sae.size)
-        sae.ae{u} = nnsetup(struct('size', sae.size(u)), x, x);
-        x = zeros(1, sae.size(u));
+function sae = saesetup(size)
+    for u = 2 : numel(size)
+        sae.ae{u-1} = nnsetup([size(u-1) size(u) size(u-1)]);
     end
 end