Skip to content

Instantly share code, notes, and snippets.

@rayryeng
Last active April 25, 2016 20:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rayryeng/642e5893d2ccd8f42433ba544d191f8b to your computer and use it in GitHub Desktop.
Save rayryeng/642e5893d2ccd8f42433ba544d191f8b to your computer and use it in GitHub Desktop.
MATLAB ANN Engine
%%% Neural Network Engine Example
clearvars;
close all;
%%% 1. Declare total number of input neurons, hidden layer neurons and output
%%% neurons
input_neurons = 2;
hidden_neurons = [4 2];
output_neurons = 1;
%%% 2. Set total number of iterations
N = 10000;
%%% 3. Create synthetic dataset
rng(123);
M = 100;
tol = 0.1;
x1 = bsxfun(@plus, rand(M,2) - 0.5, [-0.5-tol, 0.5+tol]); % Top-Left quadrant
x2 = bsxfun(@plus, rand(M,2) - 0.5, [0.5+tol, 0.5+tol]); % Top-Right quadrant
x3 = bsxfun(@plus, rand(M,2) - 0.5, [-0.5-tol, -0.5-tol]); % Bottom-Left quadrant
x4 = bsxfun(@plus, rand(M,2) - 0.5, [0.5+tol, -0.5-tol]); % Bottom-Right quadrant
% Create final dataset
X = [x1; x2; x3; x4];
Y = [-ones(M,1); ones(2*M,1); -ones(M,1)];
% Randomly sample and create training and test sets
per = 0.5;
m = numel(Y);
ind = randperm(m);
pt = floor(per*m);
Xtrain = X(ind(1:pt), :);
Ytrain = Y(ind(1:pt));
Xtest = X(ind(pt+1:end),:);
Ytest = Y(ind(pt+1:end));
mTrain = size(Xtrain,1);
%%% 4. Declare NN engine
NN = NeuralNet2([input_neurons, hidden_neurons, output_neurons]);
NN.LearningRate = 0.1;
NN.RegularizationType = 'L2';
NN.RegularizationRate = 0.01;
NN.ActivationFunction = 'Tanh';
NN.BatchSize = 10;
NN.Debug = true;
%%% 6. Find optimal weights
costVal = NN.train(Xtrain, Ytrain, N);
figure;
gscatter(Xtrain(:,1), Xtrain(:,2), Ytrain, [0 0.4470 0.7410; 0.8500 0.3250 0.0980], ...
'x');
hold on;
gscatter(Xtest(:,1), Xtest(:,2), Ytest, [0 0.4470 0.7410; 0.8500 0.3250 0.0980], ...
'.');
%%% 7. Compute predictions for training and testing data
predicthTrain = 2*double(NN.sim(Xtrain) >= 0) - 1;
predicthTest = 2*double(NN.sim(Xtest) >= 0) - 1;
%%% 8. Compute classification accuracy for training and testing data
fprintf('Classification Accuracy for Training Data: %f\n', ...
100*sum(predicthTrain == Ytrain) / numel(Ytrain));
fprintf('Classification Accuracy for Testing Data: %f\n', ...
100*sum(predicthTest == Ytest) / numel(Ytest));
fprintf('The final cost to assign the optimal weights are: %f\n', costVal(end));
%%% 9. Plotting of cost function per epoch
figure;
plot(1:N, costVal);
title('Cost function vs. epoch');
xlabel('Epoch');
ylabel('Cost Function');
grid;
%%% 10. Plot decision regions
[X,Y] = meshgrid(linspace(-1.25,1.25,1000));
Xout = [X(:) Y(:)];
predictDes = 2*double(NN.sim(Xout) >= 0) - 1;
figure;
% Plot decision regions first
h = plot(Xout(predictDes==-1,1), Xout(predictDes==-1,2), '.', 'MarkerEdgeColor', ...
[0.9290;0.6940;0.1250]);
hold on;
h2 = plot(Xout(predictDes==1,1), Xout(predictDes==1,2), '.', 'MarkerEdgeColor', ...
[0.4940;0.1840;0.5560]);
% Now plot points
plot(Xtrain(Ytrain==-1,1), Xtrain(Ytrain==-1,2), '.', 'MarkerEdgeColor', ...
[0 0.4470 0.7410], 'MarkerSize', 12);
plot(Xtrain(Ytrain==1,1), Xtrain(Ytrain==1,2), '.', 'MarkerEdgeColor', ...
[0.8500 0.3250 0.0980], 'MarkerSize', 12);
plot(Xtest(Ytest==-1,1), Xtest(Ytest==-1,2), 'x', 'MarkerEdgeColor', ...
[0 0.4470 0.7410], 'MarkerSize', 12);
plot(Xtest(Ytest==1,1), Xtest(Ytest==1,2), 'x', 'MarkerEdgeColor', ...
[0.8500 0.3250 0.0980], 'MarkerSize', 12);
axis tight;
legend('Decision Region - Negative', 'Decision Region - Positive', ...
'Training Examples - Negative', 'Training Examples - Positive', ...
'Test Examples - Negative', 'Test Examples - Positive');
classdef NeuralNet2 < handle
properties (Access = public)
LearningRate
ActivationFunction
RegularizationType
RegularizationRate
BatchSize
Debug
end
properties (Access = private)
inputSize
hiddenSizes
outputSize
weights
end
methods
% Class constructor
function this = NeuralNet2(layerSizes)
% default params
this.LearningRate = 0.003;
this.ActivationFunction = 'Tanh';
this.RegularizationType = 'None';
this.RegularizationRate = 0;
this.BatchSize = 10;
this.Debug = false;
% network structure (fully-connected feed-forward)
% Obtain input layer neuron size
this.inputSize = layerSizes(1);
% Obtain the hidden layer neuron sizes
this.hiddenSizes = layerSizes(2:end-1);
% Obtain the output layer neuron size
this.outputSize = layerSizes(end);
% Initialize matrices relating between the ith layer
% and (i+1)th layer
this.weights = cell(1,numel(layerSizes)-1);
for i=1:numel(layerSizes)-1
this.weights{i} = zeros(layerSizes(i)+1, layerSizes(i+1));
end
end
% ???
function configure(this, X, Y)
% check correct sizes
[xrows,xcols] = size(X);
[yrows,ycols] = size(Y);
assert(xrows == yrows);
assert(xcols == this.inputSize);
assert(ycols == this.outputSize);
% min/max of inputs/outputs
inMin = min(X);
inMax = max(X);
outMin = min(Y);
outMax = max(Y);
end
% Initialize neural network weights
function init(this)
% initialize with random weights
for i=1:numel(this.weights)
num = numel(this.weights{i});
this.weights{i}(:) = rand(num,1) - 0.5; % [-0.5,0.5]
end
end
% Perform training with Stochastic Gradient Descent
function perf = train(this, X, Y, numIter)
% Ensure correct sizes
assert(size(X,1) == size(Y,1))
% Ensure regularization rate and batch size is proper
assert(this.BatchSize >= 1);
assert(this.RegularizationRate >= 0);
% Check if we have specified the right regularization type
regType = this.RegularizationType;
assert(any(strcmpi(regType, {'l1', 'l2', 'none'})));
% Initialize cost function array
perf = zeros(1, numIter);
% Initialize weights
init(this);
% Total number of examples
N = size(X,1);
% Total number of applicable layers
L = numel(this.weights);
% Get batch size
B = this.BatchSize;
% Safely catch if batch size is larger than total number
% of examples
if B > N
B = N;
end
% Cell array to store input and outputs of each neuron
sNeuron = cell(1,L);
% First cell array is for the initial
xNeuron = cell(1,L+1);
% Cell array for storing the sensitivities
delta = cell(1,L);
% For L1 regularization
% Method used: http://aclweb.org/anthology/P/P09/P09-1054.pdf
if strcmpi(regType, 'l1')
% This represents the total L1 penalty that each
% weight could have received up to current point
uk = 0;
% Total penalty for each weight that was received up to
% current point
qk = cell(1,L);
for ii=1:L
qk{ii} = zeros(size(this.weights{ii}));
end
end
% Get activation function
fcn = getActivationFunction(this.ActivationFunction);
skipFactor = floor(numIter/10);
% For each iteration...
for ii = 1:numIter
% If the batch size is equal to the total number of examples
% don't bother with random selection as this will be a full
% batch gradient descent
if N == B
ind = 1 : N;
else
% Randomly select examples corresponding to the batch size
% if the batch size is not equal to the number of examples
ind = randperm(N, B);
end
% Select out the training example features and expected outputs
IN = X(ind, :);
OUT = Y(ind, :);
% Initialize input layer
xNeuron{1} = [IN ones(B,1)];
%%% Perform forward propagation
% Make sure you save the inputs and outputs into each neuron
% at the hidden and output layers
for jj = 1:L
% Compute inputs into next layer
sNeuron{jj} = xNeuron{jj} * this.weights{jj};
% Compute outputs of this layer
if jj == L
xNeuron{jj+1} = fcn(sNeuron{jj});
else
xNeuron{jj+1} = [fcn(sNeuron{jj}) ones(B,1)];
end
end
%%% Perform backpropagation
% Get derivative of activation function
dfcn = getDerivativeActivationFunction(this.ActivationFunction);
% Compute sensitivities for output layer
delta{end} = (xNeuron{end} - OUT) .* dfcn(sNeuron{end});
% Compute the sensitivities for the rest of the layers
for jj = L-1 : -1 : 1
delta{jj} = dfcn(sNeuron{jj}) .* ...
(delta{jj+1}*(this.weights{jj+1}(1:end-1,:)).');
end
%%% Compute weight updates
alpha = this.LearningRate;
lambda = this.RegularizationRate;
for jj = 1 : L
% Obtain the outputs and sensitivities for each
% affected layer
XX = xNeuron{jj};
D = delta{jj};
% Calculate batch weight update
weight_update = (1/B)*sum(bsxfun(@times, permute(XX, [2 3 1]), ...
permute(D, [3 2 1])), 3);
% Apply L2 regularization if required
if strcmpi(regType, 'l2')
weight_update(1:end-1,:) = weight_update(1:end-1,:) + ...
(lambda/B)*this.weights{jj}(1:end-1,:);
end
% Compute the final update
this.weights{jj} = this.weights{jj} - alpha*weight_update;
end
% Apply L1 regularization if required
if strcmpi(regType, 'l1')
% Step #1 - Accumulate total L1 penalty that each
% weight could have received up to this point
uk = uk + (alpha*lambda/B);
% Step #2
% Using the updated weights, now apply the penalties
for jj = 1 : L
% 2a - Save previous weights and penalties
% Make sure to remove bias terms
z = this.weights{jj}(1:end-1,:);
q = qk{jj}(1:end-1,:);
% 2b - Using the previous weights, find the weights
% that are positive and negative
w = z;
indwp = w > 0;
indwn = w < 0;
% 2c - Perform the udpate on each condition
% individually
w(indwp) = max(0, w(indwp) - (uk + q(indwp)));
w(indwn) = min(0, w(indwn) + (uk - q(indwn)));
% 2d - Update the actual penalties
qk{jj}(1:end-1,:) = q + (w - z);
% Don't forget to update the actual weights!
this.weights{jj}(1:end-1,:) = w;
end
end
% Compute cost at this iteration
perf(ii) = (0.5/B)*sum(sum((xNeuron{end} - OUT).^2));
% Add in regularization if necessary
if strcmpi(regType, 'l1')
for jj = 1 : L
perf(ii) = perf(ii) + ...
(lambda/B)*sum(sum(abs(this.weights{jj}(1:end-1,:))));
end
elseif strcmpi(regType, 'l2')
for jj = 1 : L
perf(ii) = perf(ii) + ...
(0.5*lambda/B)*sum(sum((this.weights{jj}(1:end-1,:)).^2));
end
end
% Debugging output
if this.Debug
if mod(ii,skipFactor) == 1 || ii == numIter
fprintf('Iteration #%d - Cost: %4.6e\n', ii, perf(ii));
end
end
end
end
% Perform forward propagation
% Note that the bias units are the last row of the matrix
% Inputs are in a 2D matrix of N x M
% N is the number of examples
% M is the number of features / number of input neurons
function OUT = sim(this, X)
% Check if the total number of features matches the
% total number of input neurons
assert(size(X,2) == this.inputSize);
% Get total number of examples
N = size(X,1);
%%% Begin algorithm
% Start with first layer
IN = X;
% Get activation function
fcn = getActivationFunction(this.ActivationFunction);
% For each layer...
for ii=1:numel(this.weights)
% Compute inputs into each neuron and corresponding
% outputs
OUT = fcn([IN ones(N,1)] * this.weights{ii});
% Save for next iteration
IN = OUT;
end
end
end
end
function fcn = getActivationFunction(activation)
switch lower(activation)
case 'linear'
fcn = @f_linear;
case 'relu'
fcn = @f_relu;
case 'tanh'
fcn = @f_tanh;
case 'sigmoid'
fcn = @f_sigmoid;
otherwise
error('Unknown activation function');
end
end
function fcn = getDerivativeActivationFunction(activation)
switch lower(activation)
case 'linear'
fcn = @fd_linear;
case 'relu'
fcn = @fd_relu;
case 'tanh'
fcn = @fd_tanh;
case 'sigmoid'
fcn = @fd_sigmoid;
otherwise
error('Unknown activation function');
end
end
% activation funtions and their derivatives
function y = f_linear(x)
% See also: purelin
y = x;
end
function y = fd_linear(x)
% See also: dpurelin
y = ones(size(x));
end
function y = f_relu(x)
% See also: poslin
y = max(x, 0);
end
function y = fd_relu(x)
% See also: dposlin
y = double(x >= 0);
end
function y = f_tanh(x)
% See also: tansig
%y = 2 ./ (1 + exp(-2*x)) - 1;
y = tanh(x);
end
function y = fd_tanh(x)
% See also: dtansig
y = f_tanh(x);
y = 1 - y.^2;
end
function y = f_sigmoid(x)
% See also: logsig
y = 1 ./ (1 + exp(-x));
end
function y = fd_sigmoid(x)
% See also: dlogsig
y = f_sigmoid(x);
y = y .* (1 - y);
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment