Skip to content

Commit

Permalink
Version 0.9.1 of new exercises. Copied from private repo
Browse files Browse the repository at this point in the history
  • Loading branch information
amaas committed Sep 25, 2013
1 parent 530aa9e commit a3816ee
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 37 deletions.
6 changes: 4 additions & 2 deletions cnn/cnnConvolve.m
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@

% convolution of image with feature matrix
convolvedImage = zeros(convDim, convDim);

% Obtain the feature (filterDim x filterDim) needed during the convolution

%%% YOUR CODE HERE %%%

% Flip the feature matrix because of the definition of convolution, as explained later
Expand All @@ -52,13 +54,13 @@

% Convolve "filter" with "im", adding the result to convolvedImage
% be sure to do a 'valid' convolution
%%% YOUR CODE HERE %%%

%%% YOUR CODE HERE %%%

% Add the bias unit
% Then, apply the sigmoid function to get the hidden activation
%%% YOUR CODE HERE %%%

%%% YOUR CODE HERE %%%


convolvedFeatures(:, :, filterNum, imageNum) = convolvedImage;
Expand Down
28 changes: 13 additions & 15 deletions cnn/cnnCost.m
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
function [cost, grad, preds] = cnnCost(theta,images,labels,numClasses,...
filterDim,numFilters,poolDim,pred)
% Calcualte cost and gradient for a single layer convolutional neural
% network followed by a softmax layer with cross entropy objective.
% Calcualte cost and gradient for a single layer convolutional
% neural network followed by a softmax layer with cross entropy
% objective.
%
% Parameters:
% theta - unrolled parameter vector
% images - stores images in imageDim x imageDim x numImges array
% images - stores images in imageDim x imageDim x numImges
% array
% numClasses - number of classes to predict
% filterDim - dimension of convolutional filter
% numFilters - number of convolutional filters
% poolDim - dimension of pooling area
% pred - boolean only forward propagate and return predictions
% pred - boolean only forward propagate and return
% predictions
%
%
% Returns:
Expand All @@ -32,19 +35,18 @@
% Wc is filterDim x filterDim x numFilters parameter matrix
% bc is the corresponding bias

% Wd is numClasses x hiddenSize parameter matrix where hiddenSize is the
% number of output units from the convolutional layer
% Wd is numClasses x hiddenSize parameter matrix where hiddenSize
% is the number of output units from the convolutional layer
% bd is corresponding bias
[Wc, Wd, bc, bd] = cnnParamsToStack(theta,imageDim,filterDim,numFilters,...
poolDim,numClasses);

% Same sizes as Wc,Wd,bc,bd. Used to hold gradient w.r.t above params.
% Same sizes as Wc,Wd,bc,bd. Used to hold gradient w.r.t above params.
Wc_grad = zeros(size(Wc));
Wd_grad = zeros(size(Wd));
bc_grad = zeros(size(bc));
bd_grad = zeros(size(bd));


%%======================================================================
%% STEP 1a: Forward Propagation
% In this step you will forward propagate the input through the
Expand All @@ -71,7 +73,6 @@

%%% YOUR CODE HERE %%%


% Reshape activations into 2-d matrix, hiddenSize x numImages,
% for Softmax layer
activationsPooled = reshape(activationsPooled,[],numImages);
Expand All @@ -88,7 +89,6 @@

%%% YOUR CODE HERE %%%


%%======================================================================
%% STEP 1b: Calculate Cost
% In this step you will use the labels given as input and the probs
Expand All @@ -99,7 +99,6 @@

%%% YOUR CODE HERE %%%


% Makes predictions given probs and returns without backproagating errors.
if pred
[~,preds] = max(probs,[],1);
Expand All @@ -121,17 +120,16 @@
%%% YOUR CODE HERE %%%

%%======================================================================
%% STEP 1c: Gradient Calculation
%% STEP 1d: Gradient Calculation
% After backpropagating the errors above, we can use them to calculate the
% gradient with respect to all the parameters. The gradient w.r.t the
% softmax layer is calculated as usual. To calculate the gradient w.r.t.
% a filter in the convolutional layer, convolve the backpropagated error
% for that fileter with each image and aggregate over images.
% for that filter with each image and aggregate over images.

%%% YOUR CODE HERE %%%


%% Unroll gradient into grad vector for minFunc
grad = [Wc_grad(:) ; Wd_grad(:) ; bc_grad(:) ; bd_grad(:)];

end
end
3 changes: 2 additions & 1 deletion cnn/cnnInitParams.m
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
outDim = outDim/poolDim;
hiddenSize = outDim^2*numFilters;

r = sqrt(6) / sqrt(numClasses+hiddenSize+1); % we'll choose weights uniformly from the interval [-r, r]
% we'll choose weights uniformly from the interval [-r, r]
r = sqrt(6) / sqrt(numClasses+hiddenSize+1);
Wd = rand(numClasses, hiddenSize) * 2 * r - r;

bc = zeros(numFilters, 1);
Expand Down
1 change: 0 additions & 1 deletion cnn/cnnPool.m
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,5 @@

%%% YOUR CODE HERE %%%


end

6 changes: 3 additions & 3 deletions cnn/cnnTrain.m
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
% Configuration
imageDim = 28;
numClasses = 10; % Number of classes (MNIST images fall into 10 classes)
filterDim = 9; % Filter size for conv layer (should divide imageDim)
numFilters = 10; % Number of filters for conv layer
filterDim = 9; % Filter size for conv layer
numFilters = 20; % Number of filters for conv layer
poolDim = 2; % Pooling dimension, (should divide imageDim-filterDim+1)

% Load MNIST Train
Expand All @@ -39,7 +39,7 @@
% calculation for your cnnCost.m function. You may need to add the
% appropriate path or copy the file to this directory.

DEBUG=true; % set this to true to check gradient
DEBUG=false; % set this to true to check gradient
if DEBUG
% To speed up gradient checking, we will use a reduced network and
% a debugging data set
Expand Down
23 changes: 12 additions & 11 deletions cnn/minFuncSGD.m
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
function [opttheta] = minFuncSGD(funObj,theta,data,labels,...
options)
% Runs stochastic gradient descent with momentum to optimize the parameters
% for the given objective.
% Runs stochastic gradient descent with momentum to optimize the
% parameters for the given objective.
%
% Parameters:
% funObj - function handle which accepts as input theta, data, labels
% and returns cost and gradient w.r.t to theta.
% funObj - function handle which accepts as input theta,
% data, labels and returns cost and gradient w.r.t
% to theta.
% theta - unrolled parameter vector
% data - stores data in m x n x numExamples tensor
% labels - corresponding labels in numExamples x 1 vector
Expand Down Expand Up @@ -34,7 +35,7 @@
m = length(labels); % training set size
% Setup for momentum
mom = 0.5;
momIncrease = 10;
momIncrease = 20;
velocity = zeros(size(theta));

%%======================================================================
Expand All @@ -47,7 +48,7 @@

for s=1:minibatch:(m-minibatch+1)
it = it + 1;

% increase momentum after momIncrease iterations
if it == momIncrease
mom = options.momentum;
Expand All @@ -60,10 +61,10 @@
% evaluate the objective function on the next minibatch
[cost grad] = funObj(theta,mb_data,mb_labels);

% Instructions: Add in the weighted velocity vector to the gradient
% evaluated above. Then update the current weights theta according
% to the sgd update rule with alpha as the learning rate. Finally
% update the velocity vector.
% Instructions: Add in the weighted velocity vector to the
% gradient evaluated above scaled by the learning rate.
% Then update the current weights theta according to the
% sgd update rule

%%% YOUR CODE HERE %%%

Expand All @@ -77,4 +78,4 @@

opttheta = theta;

end
end
5 changes: 3 additions & 2 deletions rica/runSoftICA.m
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@
% Step 1) Sample patches
patches = samplePatches(data,params.patchWidth,params.m);
% Step 2) Apply ZCA
%%% YOUR CODE HERE %%%
patches = zca2(patches);
% Step 3) Normalize each patch. Each patch should be normalized as
% x / ||x||_2 where x is the vector representation of the patch
%%% YOUR CODE HERE %%%
m = sqrt(sum(patches.^2) + (1e-8));
x = bsxfunwrap(@rdivide,patches,m);

%% Run the optimization
options.Method = 'lbfgs';
Expand Down
2 changes: 1 addition & 1 deletion stl/feedfowardRICA.m
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
% be sure to do a 'valid' convolution
% ---- YOUR CODE HERE ----
resp = conv2(im,filter,'valid');

% ------------------------
% Then, apply square-square-root pooling on "resp" to get the hidden
% activation "act"
act = zeros(convDim / poolDim, convDim / poolDim); % You should replace this
Expand Down
3 changes: 2 additions & 1 deletion stl/stlExercise.m
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,12 @@

% Use minFunc and softmax_regression_vec from the previous exercise to
% train a multi-class classifier.
%% ----------------- YOUR CODE HERE ----------------------
options.Method = 'lbfgs';
options.MaxFunEvals = Inf;
options.MaxIter = 300;

% optimize
%% ----------------- YOUR CODE HERE ----------------------
[opttheta_softmax, cost, exitflag] = minFunc( @(theta) softmax_regression_vec(theta, trainFeatures, trainLabels), randTheta2, options);

%% -----------------------------------------------------
Expand Down

0 comments on commit a3816ee

Please sign in to comment.