Created
December 10, 2015 15:29
-
-
Save erantone/e1dd9666e49063e73162 to your computer and use it in GitHub Desktop.
Batch and Stochastic Backpropagation implemented in Julia for XOR problem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# - Batch and Stochastic Backpropagation Implementation in Julia based on the book "Pattern Classification" by Richard Duda | |
# - Neural network with 1 hidden layer applied to XOR classification problem | |
# Copyright (C) 2015 Eric Aislan Antonelo | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
using Gadfly | |
using Distributions | |
d = 2 # input dimension | |
n_H = 2 # number of hidden units | |
c = 2 # number of classes | |
n_O = c == 2 ? 1 : c # no of output units | |
## | |
w_ = 1/sqrt(d) | |
#w_ = 0.1 | |
dist = Uniform(-w_, +w_) | |
w_H = rand(dist, (d + 1, n_H)) # hidden units weights | |
## | |
w_ = 1/sqrt(n_H) | |
#w_ = 0.1 | |
dist = Uniform(-w_, +w_) | |
w_O = rand(dist, (n_H + 1, n_O)) # output units weights | |
# dataset | |
X = ([-1, 1], [1, -1], [1, 1], [0, 0]) | |
T = (1, 1, -1, -1) | |
N = length(X) | |
function sigmoid(net) | |
a, b = 1.716, 2/3.0 | |
a * tanh(b * net) | |
end | |
function sigmoid_first_derivative(net) | |
a, b = 1.716, 2/3.0 | |
a * b * ( (cosh(b*net)*cosh(net) - sinh(b*net) * sinh(net)) / cosh(b*net)^2) | |
end | |
function forward_propagation(x, w_O, w_H) | |
net_H = transpose(w_H) * [x, 1] | |
y_H = sigmoid(net_H) | |
net_O = transpose(w_O) * [y_H, 1] | |
(net_H, y_H), (net_O, sigmoid(net_O)) | |
end | |
function train(x, t, w_O, w_H) | |
learning_rate = 0.4 | |
(net_H, y), (net_O, z) = forward_propagation(x, w_O, w_H) | |
delta_O = (t - z) .* map(sigmoid_first_derivative, net_O) | |
dw_O = learning_rate * [y, 1] * transpose(delta_O) # n_H x n_O | |
delta_H = map(sigmoid_first_derivative, net_H) .* (w_O[1:n_H,:] * delta_O) | |
dw_H = learning_rate * [x, 1] * transpose(delta_H) # d x n_H | |
#w_O += dw_O | |
#w_H += dw_H | |
#w_O, w_H | |
dw_O, dw_H | |
end | |
function J(w_O, w_H, X, T) #(w_O, w_H, X, T) | |
err = 0.0 | |
for m in 1:N # size(X, 1) | |
x = X[m] | |
t = T[m] | |
(net_H, y), (net_O, z) = forward_propagation(x, w_O, w_H) | |
err += sum((t - z).^2) # sum for all classes (output units) | |
end | |
err / 2.0N # N | |
end | |
function batch_backprop(w_O, w_H) | |
epochs = 100 | |
err = zeros(epochs, 1) | |
for r in 1:epochs # epochs | |
dw_O = zeros(size(w_O)) | |
dw_H = zeros(size(w_H)) | |
for m in 1:N # N training examples | |
x, t = X[m], T[m] | |
dw_O_, dw_H_ = train(x, t, w_O, w_H) | |
#println(size(dw_O_)) | |
#println(size(dw_H_)) | |
dw_O += dw_O_ | |
dw_H += dw_H_ | |
end | |
w_O += dw_O / N | |
w_H += dw_H / N | |
err[r] = J(w_O, w_H, X, T) #J(X,T) | |
print(transpose(w_O)) | |
print("\n") | |
print(err[r]) | |
print("\n") | |
end | |
err, w_O, w_H | |
end | |
function stochastic_backprop(w_O, w_H, alfa=0.9) | |
iterations = 1000 | |
err = zeros(iterations, 1) | |
dw_O = zeros(size(w_O)) | |
dw_H = zeros(size(w_H)) | |
for m in 1:iterations | |
i = rand(1:N) # choose random example | |
x, t = X[i], T[i] | |
dw_O_, dw_H_ = train(x, t, w_O, w_H) | |
dw_O = dw_O_ * (1-alfa) + (alfa * dw_O) | |
dw_H = dw_H_ * (1-alfa) + (alfa * dw_H) | |
w_O += dw_O | |
w_H += dw_H | |
err[m] = J(w_O, w_H, X, T) #J(X,T) | |
print(transpose(w_O)) | |
print("\n") | |
print(err[m]) | |
print("\n") | |
end | |
err, w_O, w_H | |
end | |
err, w_O, w_H = batch_backprop(w_O, w_H) | |
#err, w_O, w_H = stochastic_backprop(w_O, w_H, 0.5) | |
plot(x=1:size(err,1), y=err) | |
# err = zeros(N,1) | |
# for m in 1:N | |
# x = X[m] | |
# t = T[m] | |
# w_O, w_H = train(x, t, w_O, w_H) | |
# err[m] = J(X,T) | |
# print(transpose(w_O)) | |
# print("\n") | |
# end | |
for m in 1:N # N training examples | |
x = X[m] | |
t = T[m] | |
(net_H, y), (net_O, z) = forward_propagation(x, w_O, w_H) | |
print(x) | |
print(": ") | |
print(z) | |
print("\n") | |
end | |
w_O | |
w_H | |
[X[1], 1] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment