diff --git a/cnn/cnnConvolve.m b/cnn/cnnConvolve.m index 0c793b7..45bbee2 100644 --- a/cnn/cnnConvolve.m +++ b/cnn/cnnConvolve.m @@ -41,7 +41,9 @@ % convolution of image with feature matrix convolvedImage = zeros(convDim, convDim); + % Obtain the feature (filterDim x filterDim) needed during the convolution + %%% YOUR CODE HERE %%% % Flip the feature matrix because of the definition of convolution, as explained later @@ -52,13 +54,13 @@ % Convolve "filter" with "im", adding the result to convolvedImage % be sure to do a 'valid' convolution - %%% YOUR CODE HERE %%% + %%% YOUR CODE HERE %%% % Add the bias unit % Then, apply the sigmoid function to get the hidden activation - %%% YOUR CODE HERE %%% + %%% YOUR CODE HERE %%% convolvedFeatures(:, :, filterNum, imageNum) = convolvedImage; diff --git a/cnn/cnnCost.m b/cnn/cnnCost.m index 1feebf8..d56af2f 100644 --- a/cnn/cnnCost.m +++ b/cnn/cnnCost.m @@ -1,16 +1,19 @@ function [cost, grad, preds] = cnnCost(theta,images,labels,numClasses,... filterDim,numFilters,poolDim,pred) -% Calcualte cost and gradient for a single layer convolutional neural -% network followed by a softmax layer with cross entropy objective. +% Calcualte cost and gradient for a single layer convolutional +% neural network followed by a softmax layer with cross entropy +% objective. % % Parameters: % theta - unrolled parameter vector -% images - stores images in imageDim x imageDim x numImges array +% images - stores images in imageDim x imageDim x numImges +% array % numClasses - number of classes to predict % filterDim - dimension of convolutional filter % numFilters - number of convolutional filters % poolDim - dimension of pooling area -% pred - boolean only forward propagate and return predictions +% pred - boolean only forward propagate and return +% predictions % % % Returns: @@ -32,19 +35,18 @@ % Wc is filterDim x filterDim x numFilters parameter matrix % bc is the corresponding bias -% Wd is numClasses x hiddenSize parameter matrix where hiddenSize is the -% number of output units from the convolutional layer +% Wd is numClasses x hiddenSize parameter matrix where hiddenSize +% is the number of output units from the convolutional layer % bd is corresponding bias [Wc, Wd, bc, bd] = cnnParamsToStack(theta,imageDim,filterDim,numFilters,... poolDim,numClasses); -% Same sizes as Wc,Wd,bc,bd. Used to hold gradient w.r.t above params. +% Same sizes as Wc,Wd,bc,bd. Used to hold gradient w.r.t above params. Wc_grad = zeros(size(Wc)); Wd_grad = zeros(size(Wd)); bc_grad = zeros(size(bc)); bd_grad = zeros(size(bd)); - %%====================================================================== %% STEP 1a: Forward Propagation % In this step you will forward propagate the input through the @@ -71,7 +73,6 @@ %%% YOUR CODE HERE %%% - % Reshape activations into 2-d matrix, hiddenSize x numImages, % for Softmax layer activationsPooled = reshape(activationsPooled,[],numImages); @@ -88,7 +89,6 @@ %%% YOUR CODE HERE %%% - %%====================================================================== %% STEP 1b: Calculate Cost % In this step you will use the labels given as input and the probs @@ -99,7 +99,6 @@ %%% YOUR CODE HERE %%% - % Makes predictions given probs and returns without backproagating errors. if pred [~,preds] = max(probs,[],1); @@ -121,17 +120,16 @@ %%% YOUR CODE HERE %%% %%====================================================================== -%% STEP 1c: Gradient Calculation +%% STEP 1d: Gradient Calculation % After backpropagating the errors above, we can use them to calculate the % gradient with respect to all the parameters. The gradient w.r.t the % softmax layer is calculated as usual. To calculate the gradient w.r.t. % a filter in the convolutional layer, convolve the backpropagated error -% for that fileter with each image and aggregate over images. +% for that filter with each image and aggregate over images. %%% YOUR CODE HERE %%% - %% Unroll gradient into grad vector for minFunc grad = [Wc_grad(:) ; Wd_grad(:) ; bc_grad(:) ; bd_grad(:)]; -end \ No newline at end of file +end diff --git a/cnn/cnnInitParams.m b/cnn/cnnInitParams.m index 1ae7045..e38fd85 100644 --- a/cnn/cnnInitParams.m +++ b/cnn/cnnInitParams.m @@ -28,7 +28,8 @@ outDim = outDim/poolDim; hiddenSize = outDim^2*numFilters; -r = sqrt(6) / sqrt(numClasses+hiddenSize+1); % we'll choose weights uniformly from the interval [-r, r] +% we'll choose weights uniformly from the interval [-r, r] +r = sqrt(6) / sqrt(numClasses+hiddenSize+1); Wd = rand(numClasses, hiddenSize) * 2 * r - r; bc = zeros(numFilters, 1); diff --git a/cnn/cnnPool.m b/cnn/cnnPool.m index 9e039f7..c740915 100644 --- a/cnn/cnnPool.m +++ b/cnn/cnnPool.m @@ -31,6 +31,5 @@ %%% YOUR CODE HERE %%% - end diff --git a/cnn/cnnTrain.m b/cnn/cnnTrain.m index 5ac045a..fe034f3 100644 --- a/cnn/cnnTrain.m +++ b/cnn/cnnTrain.m @@ -15,8 +15,8 @@ % Configuration imageDim = 28; numClasses = 10; % Number of classes (MNIST images fall into 10 classes) -filterDim = 9; % Filter size for conv layer (should divide imageDim) -numFilters = 10; % Number of filters for conv layer +filterDim = 9; % Filter size for conv layer +numFilters = 20; % Number of filters for conv layer poolDim = 2; % Pooling dimension, (should divide imageDim-filterDim+1) % Load MNIST Train @@ -39,7 +39,7 @@ % calculation for your cnnCost.m function. You may need to add the % appropriate path or copy the file to this directory. -DEBUG=true; % set this to true to check gradient +DEBUG=false; % set this to true to check gradient if DEBUG % To speed up gradient checking, we will use a reduced network and % a debugging data set diff --git a/cnn/minFuncSGD.m b/cnn/minFuncSGD.m index b62d518..3571064 100644 --- a/cnn/minFuncSGD.m +++ b/cnn/minFuncSGD.m @@ -1,11 +1,12 @@ function [opttheta] = minFuncSGD(funObj,theta,data,labels,... options) -% Runs stochastic gradient descent with momentum to optimize the parameters -% for the given objective. +% Runs stochastic gradient descent with momentum to optimize the +% parameters for the given objective. % % Parameters: -% funObj - function handle which accepts as input theta, data, labels -% and returns cost and gradient w.r.t to theta. +% funObj - function handle which accepts as input theta, +% data, labels and returns cost and gradient w.r.t +% to theta. % theta - unrolled parameter vector % data - stores data in m x n x numExamples tensor % labels - corresponding labels in numExamples x 1 vector @@ -34,7 +35,7 @@ m = length(labels); % training set size % Setup for momentum mom = 0.5; -momIncrease = 10; +momIncrease = 20; velocity = zeros(size(theta)); %%====================================================================== @@ -47,7 +48,7 @@ for s=1:minibatch:(m-minibatch+1) it = it + 1; - + % increase momentum after momIncrease iterations if it == momIncrease mom = options.momentum; @@ -60,10 +61,10 @@ % evaluate the objective function on the next minibatch [cost grad] = funObj(theta,mb_data,mb_labels); - % Instructions: Add in the weighted velocity vector to the gradient - % evaluated above. Then update the current weights theta according - % to the sgd update rule with alpha as the learning rate. Finally - % update the velocity vector. + % Instructions: Add in the weighted velocity vector to the + % gradient evaluated above scaled by the learning rate. + % Then update the current weights theta according to the + % sgd update rule %%% YOUR CODE HERE %%% @@ -77,4 +78,4 @@ opttheta = theta; -end \ No newline at end of file +end diff --git a/rica/runSoftICA.m b/rica/runSoftICA.m index f369ea6..c99ffbe 100644 --- a/rica/runSoftICA.m +++ b/rica/runSoftICA.m @@ -24,10 +24,11 @@ % Step 1) Sample patches patches = samplePatches(data,params.patchWidth,params.m); % Step 2) Apply ZCA -%%% YOUR CODE HERE %%% +patches = zca2(patches); % Step 3) Normalize each patch. Each patch should be normalized as % x / ||x||_2 where x is the vector representation of the patch -%%% YOUR CODE HERE %%% +m = sqrt(sum(patches.^2) + (1e-8)); +x = bsxfunwrap(@rdivide,patches,m); %% Run the optimization options.Method = 'lbfgs'; diff --git a/stl/feedfowardRICA.m b/stl/feedfowardRICA.m index 0f56d70..4c073b9 100644 --- a/stl/feedfowardRICA.m +++ b/stl/feedfowardRICA.m @@ -58,7 +58,7 @@ % be sure to do a 'valid' convolution % ---- YOUR CODE HERE ---- resp = conv2(im,filter,'valid'); - + % ------------------------ % Then, apply square-square-root pooling on "resp" to get the hidden % activation "act" act = zeros(convDim / poolDim, convDim / poolDim); % You should replace this diff --git a/stl/stlExercise.m b/stl/stlExercise.m index 1aefdd6..fb54c3d 100644 --- a/stl/stlExercise.m +++ b/stl/stlExercise.m @@ -131,11 +131,12 @@ % Use minFunc and softmax_regression_vec from the previous exercise to % train a multi-class classifier. -%% ----------------- YOUR CODE HERE ---------------------- options.Method = 'lbfgs'; options.MaxFunEvals = Inf; options.MaxIter = 300; + % optimize +%% ----------------- YOUR CODE HERE ---------------------- [opttheta_softmax, cost, exitflag] = minFunc( @(theta) softmax_regression_vec(theta, trainFeatures, trainLabels), randTheta2, options); %% -----------------------------------------------------