Skip to content

Instantly share code, notes, and snippets.

@erantone
Created December 10, 2015 15:29
Show Gist options
  • Save erantone/e1dd9666e49063e73162 to your computer and use it in GitHub Desktop.
Save erantone/e1dd9666e49063e73162 to your computer and use it in GitHub Desktop.
Batch and Stochastic Backpropagation implemented in Julia for XOR problem
# - Batch and Stochastic Backpropagation Implementation in Julia based on the book "Pattern Classification" by Richard Duda
# - Neural network with 1 hidden layer applied to XOR classification problem
# Copyright (C) 2015 Eric Aislan Antonelo
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
using Gadfly
using Distributions
d = 2 # input dimension
n_H = 2 # number of hidden units
c = 2 # number of classes
n_O = c == 2 ? 1 : c # no of output units
##
w_ = 1/sqrt(d)
#w_ = 0.1
dist = Uniform(-w_, +w_)
w_H = rand(dist, (d + 1, n_H)) # hidden units weights
##
w_ = 1/sqrt(n_H)
#w_ = 0.1
dist = Uniform(-w_, +w_)
w_O = rand(dist, (n_H + 1, n_O)) # output units weights
# dataset
X = ([-1, 1], [1, -1], [1, 1], [0, 0])
T = (1, 1, -1, -1)
N = length(X)
function sigmoid(net)
a, b = 1.716, 2/3.0
a * tanh(b * net)
end
function sigmoid_first_derivative(net)
a, b = 1.716, 2/3.0
a * b * ( (cosh(b*net)*cosh(net) - sinh(b*net) * sinh(net)) / cosh(b*net)^2)
end
function forward_propagation(x, w_O, w_H)
net_H = transpose(w_H) * [x, 1]
y_H = sigmoid(net_H)
net_O = transpose(w_O) * [y_H, 1]
(net_H, y_H), (net_O, sigmoid(net_O))
end
function train(x, t, w_O, w_H)
learning_rate = 0.4
(net_H, y), (net_O, z) = forward_propagation(x, w_O, w_H)
delta_O = (t - z) .* map(sigmoid_first_derivative, net_O)
dw_O = learning_rate * [y, 1] * transpose(delta_O) # n_H x n_O
delta_H = map(sigmoid_first_derivative, net_H) .* (w_O[1:n_H,:] * delta_O)
dw_H = learning_rate * [x, 1] * transpose(delta_H) # d x n_H
#w_O += dw_O
#w_H += dw_H
#w_O, w_H
dw_O, dw_H
end
function J(w_O, w_H, X, T) #(w_O, w_H, X, T)
err = 0.0
for m in 1:N # size(X, 1)
x = X[m]
t = T[m]
(net_H, y), (net_O, z) = forward_propagation(x, w_O, w_H)
err += sum((t - z).^2) # sum for all classes (output units)
end
err / 2.0N # N
end
function batch_backprop(w_O, w_H)
epochs = 100
err = zeros(epochs, 1)
for r in 1:epochs # epochs
dw_O = zeros(size(w_O))
dw_H = zeros(size(w_H))
for m in 1:N # N training examples
x, t = X[m], T[m]
dw_O_, dw_H_ = train(x, t, w_O, w_H)
#println(size(dw_O_))
#println(size(dw_H_))
dw_O += dw_O_
dw_H += dw_H_
end
w_O += dw_O / N
w_H += dw_H / N
err[r] = J(w_O, w_H, X, T) #J(X,T)
print(transpose(w_O))
print("\n")
print(err[r])
print("\n")
end
err, w_O, w_H
end
function stochastic_backprop(w_O, w_H, alfa=0.9)
iterations = 1000
err = zeros(iterations, 1)
dw_O = zeros(size(w_O))
dw_H = zeros(size(w_H))
for m in 1:iterations
i = rand(1:N) # choose random example
x, t = X[i], T[i]
dw_O_, dw_H_ = train(x, t, w_O, w_H)
dw_O = dw_O_ * (1-alfa) + (alfa * dw_O)
dw_H = dw_H_ * (1-alfa) + (alfa * dw_H)
w_O += dw_O
w_H += dw_H
err[m] = J(w_O, w_H, X, T) #J(X,T)
print(transpose(w_O))
print("\n")
print(err[m])
print("\n")
end
err, w_O, w_H
end
err, w_O, w_H = batch_backprop(w_O, w_H)
#err, w_O, w_H = stochastic_backprop(w_O, w_H, 0.5)
plot(x=1:size(err,1), y=err)
# err = zeros(N,1)
# for m in 1:N
# x = X[m]
# t = T[m]
# w_O, w_H = train(x, t, w_O, w_H)
# err[m] = J(X,T)
# print(transpose(w_O))
# print("\n")
# end
for m in 1:N # N training examples
x = X[m]
t = T[m]
(net_H, y), (net_O, z) = forward_propagation(x, w_O, w_H)
print(x)
print(": ")
print(z)
print("\n")
end
w_O
w_H
[X[1], 1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment