Skip to content

Instantly share code, notes, and snippets.

@zjf
Last active December 14, 2015 06:39
Show Gist options
  • Save zjf/5044349 to your computer and use it in GitHub Desktop.
Save zjf/5044349 to your computer and use it in GitHub Desktop.
Backpropogation Algorithm
## Demo
set.seed(12321)
W_all_test1 = list(matrix(rnorm(1, 0, 0.5), nc = 1), matrix(10, nc = 1))
b_all_test1 = list(matrix(rnorm(1, 0, 0.5), nc = 1), matrix(-5, nc = 1))
X = matrix(rnorm(200, 0, 2), ncol = 1)
Y = apply(X, 1, function(x){forward_propagation(x, W_all_test1, b_all_test1)})
X.train = X[1:150,]
Y.train = Y[1:150]
X.test = X[151:200,]
Y.test = Y[151:200]
#plot(X, Y)
model = back_propagation(matrix(X.train, nc = 1), Y.train, c(1,1), 10, 0)
Y.pred = apply(matrix(X.test, nc = 1), 1, function(x){forward_propagation(x, model$W, model$b)})
plot(Y.test, Y.pred)
## The following functions should be loaded first.
## http://ufldl.stanford.edu/wiki/index.php/Backpropagation_Algorithm
act_f <- function(x){
x = as.numeric(x)
return(1/(1+exp(-x)))
}
act_f_der <- function(x){
x = as.numeric(x)
x1 = act_f(x1)
return(x1*(1-x1))
}
forward_z <- function(a, W, b){
a = as.numeric(a)
return(W %*% a + b)
}
forward_a <- function(z){
return(act_f(z))
}
forward_propagation <- function(x, W_all, b_all){
n = length(W_all)
if(!is.array(x))
x = matrix(x, nr = 1)
y = matrix(0, nrow(x), nrow(W_all[[length(W_all)]]))
for(j in 1:nrow(x)){
a = as.numeric(x[j,])
for(i in 1:n){
z = forward_z(a, W_all[[i]], b_all[[i]])
a = forward_a(z)
}
y[j, ] = a
}
return(y)
}
back_propagation <- function(X, Y, net_str, alpha, lambda, ...){
if(!is.array(Y)){
Y = matrix(Y, nc = 1)
}
m = dim(X)[1]
p = dim(X)[2]
n = length(as.numeric(net_str))
W = list()
b = list()
args = list(...)
if(length(args) == 2){
beta = args[[1]]
rho = args[[2]]
sparse = TRUE
}else{
sparse = FALSE
}
## Init
set.seed(216)
for(i in 1:n){
W = c(W, list(matrix(rnorm(net_str[i] * p, 0, 0.5), ncol = p)))
b = c(b, list(matrix(rnorm(net_str[i], 0, 0.5), ncol = 1)))
p = net_str[i]
}
W_new = W
b_new = b
err = 1
k = 1
while(err[k] > 0.00001){
W_gradient = lapply(W, "*", lambda)
b_gradient = lapply(b, "*", 0)
## if there exits a sparsity constraint on the hidden units,
## then perform scan through samples computing a forward pass on each to
## accumulate (sum up) the activations and compute rho_real
if(sparse == TRUE){
rho_real = lapply(W, function(x){matrix(numeric(nrow(x)), nc = 1)})
for(i in 1:m){
activation_step = X[i, ]
for(j in 1:n){
activation_step = forward_propagation(activation_step, W[j], b[j])
rho_real[[j]] = rho_real[[j]] + matrix(activation_step, nc = 1)
}
}
rho_real = lapply(rho_real, "/", m)
}
for(i in 1:m){
activation_all = list(matrix(X[i, ], nc = 1))
activation_step = X[i, ]
delta = lapply(W, function(x){numeric(nrow(x))})
for(j in 1:n){
activation_step = forward_propagation(activation_step, W[j], b[j])
activation_all = c(activation_all, list(matrix(activation_step, nc = 1)))
}
delta[[n]] = -(Y[i, ]- activation_all[[n+1]]) * activation_all[[n+1]] * (1 - activation_all[[n+1]])
W_gradient[[n]] = W_gradient[[n]] + as.matrix(delta[[n]], nc = 1) %*% t(activation_all[[n]]) / m
b_gradient[[n]] = b_gradient[[n]] + as.matrix(delta[[n]], nc = 1) / m
for(j in n:2){
delta[[j-1]] = as.numeric(t(W[[j]]) %*% delta[[j]] + ifelse(rep(!sparse, net_str[j-1]), 0, beta*(-rho/rho_real[[j-1]]+(1-rho)/(1-rho_real[[j-1]])))) * activation_all[[j]] * (1 - activation_all[[j]])
W_gradient[[j-1]] = W_gradient[[j-1]] + as.matrix(delta[[j-1]], nc = 1) %*% t(activation_all[[j-1]]) / m
b_gradient[[j-1]] = b_gradient[[j-1]] + as.matrix(delta[[j-1]], nc = 1) / m
}
}
for(i in 1:n){
W_new[[i]] = W[[i]] - alpha * W_gradient[[i]]
b_new[[i]] = b[[i]] - alpha * b_gradient[[i]]
}
W = W_new
b = b_new
k = k + 1
err = c(err, sum(c(unlist(W_gradient)^2, unlist(b_gradient)^2))*alpha^2)
print(c(k, err[k]))
}
return(list(W=W, b=b, k=k, err = err))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment