-
-
Save rayryeng/642e5893d2ccd8f42433ba544d191f8b to your computer and use it in GitHub Desktop.
MATLAB ANN Engine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%% Neural Network Engine Example | |
clearvars; | |
close all; | |
%%% 1. Declare total number of input neurons, hidden layer neurons and output | |
%%% neurons | |
input_neurons = 2; | |
hidden_neurons = [4 2]; | |
output_neurons = 1; | |
%%% 2. Set total number of iterations | |
N = 10000; | |
%%% 3. Create synthetic dataset | |
rng(123); | |
M = 100; | |
tol = 0.1; | |
x1 = bsxfun(@plus, rand(M,2) - 0.5, [-0.5-tol, 0.5+tol]); % Top-Left quadrant | |
x2 = bsxfun(@plus, rand(M,2) - 0.5, [0.5+tol, 0.5+tol]); % Top-Right quadrant | |
x3 = bsxfun(@plus, rand(M,2) - 0.5, [-0.5-tol, -0.5-tol]); % Bottom-Left quadrant | |
x4 = bsxfun(@plus, rand(M,2) - 0.5, [0.5+tol, -0.5-tol]); % Bottom-Right quadrant | |
% Create final dataset | |
X = [x1; x2; x3; x4]; | |
Y = [-ones(M,1); ones(2*M,1); -ones(M,1)]; | |
% Randomly sample and create training and test sets | |
per = 0.5; | |
m = numel(Y); | |
ind = randperm(m); | |
pt = floor(per*m); | |
Xtrain = X(ind(1:pt), :); | |
Ytrain = Y(ind(1:pt)); | |
Xtest = X(ind(pt+1:end),:); | |
Ytest = Y(ind(pt+1:end)); | |
mTrain = size(Xtrain,1); | |
%%% 4. Declare NN engine | |
NN = NeuralNet2([input_neurons, hidden_neurons, output_neurons]); | |
NN.LearningRate = 0.1; | |
NN.RegularizationType = 'L2'; | |
NN.RegularizationRate = 0.01; | |
NN.ActivationFunction = 'Tanh'; | |
NN.BatchSize = 10; | |
NN.Debug = true; | |
%%% 6. Find optimal weights | |
costVal = NN.train(Xtrain, Ytrain, N); | |
figure; | |
gscatter(Xtrain(:,1), Xtrain(:,2), Ytrain, [0 0.4470 0.7410; 0.8500 0.3250 0.0980], ... | |
'x'); | |
hold on; | |
gscatter(Xtest(:,1), Xtest(:,2), Ytest, [0 0.4470 0.7410; 0.8500 0.3250 0.0980], ... | |
'.'); | |
%%% 7. Compute predictions for training and testing data | |
predicthTrain = 2*double(NN.sim(Xtrain) >= 0) - 1; | |
predicthTest = 2*double(NN.sim(Xtest) >= 0) - 1; | |
%%% 8. Compute classification accuracy for training and testing data | |
fprintf('Classification Accuracy for Training Data: %f\n', ... | |
100*sum(predicthTrain == Ytrain) / numel(Ytrain)); | |
fprintf('Classification Accuracy for Testing Data: %f\n', ... | |
100*sum(predicthTest == Ytest) / numel(Ytest)); | |
fprintf('The final cost to assign the optimal weights are: %f\n', costVal(end)); | |
%%% 9. Plotting of cost function per epoch | |
figure; | |
plot(1:N, costVal); | |
title('Cost function vs. epoch'); | |
xlabel('Epoch'); | |
ylabel('Cost Function'); | |
grid; | |
%%% 10. Plot decision regions | |
[X,Y] = meshgrid(linspace(-1.25,1.25,1000)); | |
Xout = [X(:) Y(:)]; | |
predictDes = 2*double(NN.sim(Xout) >= 0) - 1; | |
figure; | |
% Plot decision regions first | |
h = plot(Xout(predictDes==-1,1), Xout(predictDes==-1,2), '.', 'MarkerEdgeColor', ... | |
[0.9290;0.6940;0.1250]); | |
hold on; | |
h2 = plot(Xout(predictDes==1,1), Xout(predictDes==1,2), '.', 'MarkerEdgeColor', ... | |
[0.4940;0.1840;0.5560]); | |
% Now plot points | |
plot(Xtrain(Ytrain==-1,1), Xtrain(Ytrain==-1,2), '.', 'MarkerEdgeColor', ... | |
[0 0.4470 0.7410], 'MarkerSize', 12); | |
plot(Xtrain(Ytrain==1,1), Xtrain(Ytrain==1,2), '.', 'MarkerEdgeColor', ... | |
[0.8500 0.3250 0.0980], 'MarkerSize', 12); | |
plot(Xtest(Ytest==-1,1), Xtest(Ytest==-1,2), 'x', 'MarkerEdgeColor', ... | |
[0 0.4470 0.7410], 'MarkerSize', 12); | |
plot(Xtest(Ytest==1,1), Xtest(Ytest==1,2), 'x', 'MarkerEdgeColor', ... | |
[0.8500 0.3250 0.0980], 'MarkerSize', 12); | |
axis tight; | |
legend('Decision Region - Negative', 'Decision Region - Positive', ... | |
'Training Examples - Negative', 'Training Examples - Positive', ... | |
'Test Examples - Negative', 'Test Examples - Positive'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
classdef NeuralNet2 < handle | |
properties (Access = public) | |
LearningRate | |
ActivationFunction | |
RegularizationType | |
RegularizationRate | |
BatchSize | |
Debug | |
end | |
properties (Access = private) | |
inputSize | |
hiddenSizes | |
outputSize | |
weights | |
end | |
methods | |
% Class constructor | |
function this = NeuralNet2(layerSizes) | |
% default params | |
this.LearningRate = 0.003; | |
this.ActivationFunction = 'Tanh'; | |
this.RegularizationType = 'None'; | |
this.RegularizationRate = 0; | |
this.BatchSize = 10; | |
this.Debug = false; | |
% network structure (fully-connected feed-forward) | |
% Obtain input layer neuron size | |
this.inputSize = layerSizes(1); | |
% Obtain the hidden layer neuron sizes | |
this.hiddenSizes = layerSizes(2:end-1); | |
% Obtain the output layer neuron size | |
this.outputSize = layerSizes(end); | |
% Initialize matrices relating between the ith layer | |
% and (i+1)th layer | |
this.weights = cell(1,numel(layerSizes)-1); | |
for i=1:numel(layerSizes)-1 | |
this.weights{i} = zeros(layerSizes(i)+1, layerSizes(i+1)); | |
end | |
end | |
% ??? | |
function configure(this, X, Y) | |
% check correct sizes | |
[xrows,xcols] = size(X); | |
[yrows,ycols] = size(Y); | |
assert(xrows == yrows); | |
assert(xcols == this.inputSize); | |
assert(ycols == this.outputSize); | |
% min/max of inputs/outputs | |
inMin = min(X); | |
inMax = max(X); | |
outMin = min(Y); | |
outMax = max(Y); | |
end | |
% Initialize neural network weights | |
function init(this) | |
% initialize with random weights | |
for i=1:numel(this.weights) | |
num = numel(this.weights{i}); | |
this.weights{i}(:) = rand(num,1) - 0.5; % [-0.5,0.5] | |
end | |
end | |
% Perform training with Stochastic Gradient Descent | |
function perf = train(this, X, Y, numIter) | |
% Ensure correct sizes | |
assert(size(X,1) == size(Y,1)) | |
% Ensure regularization rate and batch size is proper | |
assert(this.BatchSize >= 1); | |
assert(this.RegularizationRate >= 0); | |
% Check if we have specified the right regularization type | |
regType = this.RegularizationType; | |
assert(any(strcmpi(regType, {'l1', 'l2', 'none'}))); | |
% Initialize cost function array | |
perf = zeros(1, numIter); | |
% Initialize weights | |
init(this); | |
% Total number of examples | |
N = size(X,1); | |
% Total number of applicable layers | |
L = numel(this.weights); | |
% Get batch size | |
B = this.BatchSize; | |
% Safely catch if batch size is larger than total number | |
% of examples | |
if B > N | |
B = N; | |
end | |
% Cell array to store input and outputs of each neuron | |
sNeuron = cell(1,L); | |
% First cell array is for the initial | |
xNeuron = cell(1,L+1); | |
% Cell array for storing the sensitivities | |
delta = cell(1,L); | |
% For L1 regularization | |
% Method used: http://aclweb.org/anthology/P/P09/P09-1054.pdf | |
if strcmpi(regType, 'l1') | |
% This represents the total L1 penalty that each | |
% weight could have received up to current point | |
uk = 0; | |
% Total penalty for each weight that was received up to | |
% current point | |
qk = cell(1,L); | |
for ii=1:L | |
qk{ii} = zeros(size(this.weights{ii})); | |
end | |
end | |
% Get activation function | |
fcn = getActivationFunction(this.ActivationFunction); | |
skipFactor = floor(numIter/10); | |
% For each iteration... | |
for ii = 1:numIter | |
% If the batch size is equal to the total number of examples | |
% don't bother with random selection as this will be a full | |
% batch gradient descent | |
if N == B | |
ind = 1 : N; | |
else | |
% Randomly select examples corresponding to the batch size | |
% if the batch size is not equal to the number of examples | |
ind = randperm(N, B); | |
end | |
% Select out the training example features and expected outputs | |
IN = X(ind, :); | |
OUT = Y(ind, :); | |
% Initialize input layer | |
xNeuron{1} = [IN ones(B,1)]; | |
%%% Perform forward propagation | |
% Make sure you save the inputs and outputs into each neuron | |
% at the hidden and output layers | |
for jj = 1:L | |
% Compute inputs into next layer | |
sNeuron{jj} = xNeuron{jj} * this.weights{jj}; | |
% Compute outputs of this layer | |
if jj == L | |
xNeuron{jj+1} = fcn(sNeuron{jj}); | |
else | |
xNeuron{jj+1} = [fcn(sNeuron{jj}) ones(B,1)]; | |
end | |
end | |
%%% Perform backpropagation | |
% Get derivative of activation function | |
dfcn = getDerivativeActivationFunction(this.ActivationFunction); | |
% Compute sensitivities for output layer | |
delta{end} = (xNeuron{end} - OUT) .* dfcn(sNeuron{end}); | |
% Compute the sensitivities for the rest of the layers | |
for jj = L-1 : -1 : 1 | |
delta{jj} = dfcn(sNeuron{jj}) .* ... | |
(delta{jj+1}*(this.weights{jj+1}(1:end-1,:)).'); | |
end | |
%%% Compute weight updates | |
alpha = this.LearningRate; | |
lambda = this.RegularizationRate; | |
for jj = 1 : L | |
% Obtain the outputs and sensitivities for each | |
% affected layer | |
XX = xNeuron{jj}; | |
D = delta{jj}; | |
% Calculate batch weight update | |
weight_update = (1/B)*sum(bsxfun(@times, permute(XX, [2 3 1]), ... | |
permute(D, [3 2 1])), 3); | |
% Apply L2 regularization if required | |
if strcmpi(regType, 'l2') | |
weight_update(1:end-1,:) = weight_update(1:end-1,:) + ... | |
(lambda/B)*this.weights{jj}(1:end-1,:); | |
end | |
% Compute the final update | |
this.weights{jj} = this.weights{jj} - alpha*weight_update; | |
end | |
% Apply L1 regularization if required | |
if strcmpi(regType, 'l1') | |
% Step #1 - Accumulate total L1 penalty that each | |
% weight could have received up to this point | |
uk = uk + (alpha*lambda/B); | |
% Step #2 | |
% Using the updated weights, now apply the penalties | |
for jj = 1 : L | |
% 2a - Save previous weights and penalties | |
% Make sure to remove bias terms | |
z = this.weights{jj}(1:end-1,:); | |
q = qk{jj}(1:end-1,:); | |
% 2b - Using the previous weights, find the weights | |
% that are positive and negative | |
w = z; | |
indwp = w > 0; | |
indwn = w < 0; | |
% 2c - Perform the udpate on each condition | |
% individually | |
w(indwp) = max(0, w(indwp) - (uk + q(indwp))); | |
w(indwn) = min(0, w(indwn) + (uk - q(indwn))); | |
% 2d - Update the actual penalties | |
qk{jj}(1:end-1,:) = q + (w - z); | |
% Don't forget to update the actual weights! | |
this.weights{jj}(1:end-1,:) = w; | |
end | |
end | |
% Compute cost at this iteration | |
perf(ii) = (0.5/B)*sum(sum((xNeuron{end} - OUT).^2)); | |
% Add in regularization if necessary | |
if strcmpi(regType, 'l1') | |
for jj = 1 : L | |
perf(ii) = perf(ii) + ... | |
(lambda/B)*sum(sum(abs(this.weights{jj}(1:end-1,:)))); | |
end | |
elseif strcmpi(regType, 'l2') | |
for jj = 1 : L | |
perf(ii) = perf(ii) + ... | |
(0.5*lambda/B)*sum(sum((this.weights{jj}(1:end-1,:)).^2)); | |
end | |
end | |
% Debugging output | |
if this.Debug | |
if mod(ii,skipFactor) == 1 || ii == numIter | |
fprintf('Iteration #%d - Cost: %4.6e\n', ii, perf(ii)); | |
end | |
end | |
end | |
end | |
% Perform forward propagation | |
% Note that the bias units are the last row of the matrix | |
% Inputs are in a 2D matrix of N x M | |
% N is the number of examples | |
% M is the number of features / number of input neurons | |
function OUT = sim(this, X) | |
% Check if the total number of features matches the | |
% total number of input neurons | |
assert(size(X,2) == this.inputSize); | |
% Get total number of examples | |
N = size(X,1); | |
%%% Begin algorithm | |
% Start with first layer | |
IN = X; | |
% Get activation function | |
fcn = getActivationFunction(this.ActivationFunction); | |
% For each layer... | |
for ii=1:numel(this.weights) | |
% Compute inputs into each neuron and corresponding | |
% outputs | |
OUT = fcn([IN ones(N,1)] * this.weights{ii}); | |
% Save for next iteration | |
IN = OUT; | |
end | |
end | |
end | |
end | |
function fcn = getActivationFunction(activation) | |
switch lower(activation) | |
case 'linear' | |
fcn = @f_linear; | |
case 'relu' | |
fcn = @f_relu; | |
case 'tanh' | |
fcn = @f_tanh; | |
case 'sigmoid' | |
fcn = @f_sigmoid; | |
otherwise | |
error('Unknown activation function'); | |
end | |
end | |
function fcn = getDerivativeActivationFunction(activation) | |
switch lower(activation) | |
case 'linear' | |
fcn = @fd_linear; | |
case 'relu' | |
fcn = @fd_relu; | |
case 'tanh' | |
fcn = @fd_tanh; | |
case 'sigmoid' | |
fcn = @fd_sigmoid; | |
otherwise | |
error('Unknown activation function'); | |
end | |
end | |
% activation funtions and their derivatives | |
function y = f_linear(x) | |
% See also: purelin | |
y = x; | |
end | |
function y = fd_linear(x) | |
% See also: dpurelin | |
y = ones(size(x)); | |
end | |
function y = f_relu(x) | |
% See also: poslin | |
y = max(x, 0); | |
end | |
function y = fd_relu(x) | |
% See also: dposlin | |
y = double(x >= 0); | |
end | |
function y = f_tanh(x) | |
% See also: tansig | |
%y = 2 ./ (1 + exp(-2*x)) - 1; | |
y = tanh(x); | |
end | |
function y = fd_tanh(x) | |
% See also: dtansig | |
y = f_tanh(x); | |
y = 1 - y.^2; | |
end | |
function y = f_sigmoid(x) | |
% See also: logsig | |
y = 1 ./ (1 + exp(-x)); | |
end | |
function y = fd_sigmoid(x) | |
% See also: dlogsig | |
y = f_sigmoid(x); | |
y = y .* (1 - y); | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment