Skip to content

Instantly share code, notes, and snippets.

@bquast
Last active October 13, 2016 17:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bquast/09c12cfa305069302f75d2fef67753cc to your computer and use it in GitHub Desktop.
Save bquast/09c12cfa305069302f75d2fef67753cc to your computer and use it in GitHub Desktop.
set.seed(1)
# define some functions
## convert integer to binary
i2b <- function(integer, length=8)
as.numeric(intToBits(integer))[1:length]
## apply
int2bin <- function(integer, length=8)
t(sapply(integer, i2b, length=length))
## sigmoid function
sigmoid <- function(x, k=1, x0=0)
1 / (1+exp( -k*(x-x0) ))
## sigmoid derivative
sigmoid_output_to_derivative <- function(x)
x*(1-x)
## tanh derivative
tanh_output_to_derivative <- function(x)
1-x^2
# create training numbers
X1 = sample(0:1023, 100000, replace=TRUE)
X2 = sample(0:1023, 100000, replace=TRUE)
# create training response numbers
Y <- X1 + X2
# convert to binary
X1b <- int2bin(X1, length=10)
X2b <- int2bin(X2, length=10)
Yb <- int2bin(Y, length=10)
# input variables
alpha = 0.1
alpha_decay = 0.999
momentum = 0.1
init_weight = 1
batch_size = 20
input_dim = 2
hidden_dim = 8
output_dim = 1
binary_dim = 10
largest_number = 2^binary_dim
output_size = 100
# initialise neural network weights
synapse_0_i = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
synapse_0_f = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
synapse_0_o = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
synapse_0_c = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
synapse_h_i = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
synapse_h_f = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
synapse_h_o = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
synapse_h_c = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
synapse_b_1 = runif(n = output_dim, min=-init_weight, max=init_weight)
synapse_b_i = runif(n = hidden_dim, min=-init_weight, max=init_weight)
synapse_b_f = runif(n = hidden_dim, min=-init_weight, max=init_weight)
synapse_b_o = runif(n = hidden_dim, min=-init_weight, max=init_weight)
synapse_b_c = runif(n = hidden_dim, min=-init_weight, max=init_weight)
# initialise synapse updates
synapse_0_i_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
synapse_0_f_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
synapse_0_o_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
synapse_0_c_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
synapse_h_i_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
synapse_h_f_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
synapse_h_o_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
synapse_h_c_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
synapse_b_1_update = rep(0, output_dim)
synapse_b_i_update = rep(0, hidden_dim)
synapse_b_f_update = rep(0, hidden_dim)
synapse_b_o_update = rep(0, hidden_dim)
synapse_b_c_update = rep(0, hidden_dim)
# training logic
for (j in 1:length(X1)) {
# select input variables
a = X1b[j,]
b = X2b[j,]
# response variable
c = Yb[j,]
# where we'll store our best guesss (binary encoded)
d = matrix(0, nrow = 1, ncol = binary_dim)
overallError = 0
layer_2_deltas = matrix(0)
layer_1_values = matrix(0, nrow=1, ncol = hidden_dim)
# initialise state cell
c_t_m1 = matrix(0, nrow=1, ncol = hidden_dim)
# moving along the positions in the binary encoding
for (position in 1:binary_dim) {
# generate input and output
X = cbind(a[position],b[position])
y = c[position]
# hidden layer (input ~+ prev_hidden)
i_t = sigmoid((X%*%synapse_0_i) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_i) + synapse_b_i) # add bias?
f_t = sigmoid((X%*%synapse_0_f) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_f) + synapse_b_f) # add bias?
o_t = sigmoid((X%*%synapse_0_o) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_o) + synapse_b_o) # add bias?
c_in_t = tanh( (X%*%synapse_0_c) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_c) + synapse_b_c)
c_t = (f_t * c_t_m1[dim(layer_1_values)[1],]) + (i_t * c_in_t)
layer_1 = o_t * tanh(c_t)
c_t_m1 = rbind(c_t_m1, c_t)
# output layer (new binary representation)
layer_2 = sigmoid(layer_1 %*% synapse_1 + synapse_b_1)
# did we miss?... if so, by how much?
layer_2_error = y - layer_2
layer_2_deltas = rbind(layer_2_deltas, layer_2_error * sigmoid_output_to_derivative(layer_2))
overallError = overallError + round(abs(layer_2_error))
# decode estimate so we can print it out
d[position] = round(layer_2)
# store hidden layer so we can print it out
layer_1_values = rbind(layer_1_values, layer_1)
}
future_layer_1_i_delta = matrix(0, nrow = 1, ncol = hidden_dim)
future_layer_1_f_delta = matrix(0, nrow = 1, ncol = hidden_dim)
future_layer_1_o_delta = matrix(0, nrow = 1, ncol = hidden_dim)
future_layer_1_c_delta = matrix(0, nrow = 1, ncol = hidden_dim)
for (position in 1:binary_dim) {
X = cbind(a[binary_dim-(position-1)], b[binary_dim-(position-1)])
layer_1 = layer_1_values[dim(layer_1_values)[1]-(position-1),]
prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-position,]
# error at output layer
layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-(position-1),]
# error at hidden layer
layer_1_i_delta = (future_layer_1_i_delta %*% t(synapse_h_i) + layer_2_delta %*% t(synapse_1)) *
sigmoid_output_to_derivative(tanh_output_to_derivative(layer_1))
layer_1_f_delta = (future_layer_1_f_delta %*% t(synapse_h_f) + layer_2_delta %*% t(synapse_1)) *
sigmoid_output_to_derivative(tanh_output_to_derivative(layer_1))
layer_1_o_delta = (future_layer_1_o_delta %*% t(synapse_h_o) + layer_2_delta %*% t(synapse_1)) *
sigmoid_output_to_derivative(layer_1)
layer_1_c_delta = (future_layer_1_c_delta %*% t(synapse_h_c) + layer_2_delta %*% t(synapse_1)) *
tanh_output_to_derivative(tanh_output_to_derivative(layer_1))
# let's update all our weights so we can try again
synapse_1_update = synapse_1_update + matrix(layer_1) %*% layer_2_delta
synapse_h_i_update = synapse_h_i_update + matrix(prev_layer_1) %*% layer_1_i_delta
synapse_h_f_update = synapse_h_f_update + matrix(prev_layer_1) %*% layer_1_f_delta
synapse_h_o_update = synapse_h_o_update + matrix(prev_layer_1) %*% layer_1_o_delta
synapse_h_c_update = synapse_h_c_update + matrix(prev_layer_1) %*% layer_1_c_delta
synapse_0_i_update = synapse_0_i_update + t(X) %*% layer_1_i_delta
synapse_0_f_update = synapse_0_f_update + t(X) %*% layer_1_f_delta
synapse_0_o_update = synapse_0_o_update + t(X) %*% layer_1_o_delta
synapse_0_c_update = synapse_0_c_update + t(X) %*% layer_1_c_delta
synapse_b_1_update = synapse_b_1_update + layer_2_delta
synapse_b_i_update = synapse_b_i_update + layer_1_i_delta
synapse_b_f_update = synapse_b_f_update + layer_1_f_delta
synapse_b_o_update = synapse_b_o_update + layer_1_o_delta
synapse_b_c_update = synapse_b_c_update + layer_1_c_delta
future_layer_1_i_delta = layer_1_i_delta
future_layer_1_f_delta = layer_1_f_delta
future_layer_1_o_delta = layer_1_o_delta
future_layer_1_c_delta = layer_1_c_delta
}
if(j %% batch_size ==0) {
synapse_0_i = synapse_0_i + ( synapse_0_i_update * alpha )
synapse_0_f = synapse_0_f + ( synapse_0_f_update * alpha )
synapse_0_o = synapse_0_o + ( synapse_0_o_update * alpha )
synapse_0_c = synapse_0_c + ( synapse_0_c_update * alpha )
synapse_1 = synapse_1 + ( synapse_1_update * alpha )
synapse_h_i = synapse_h_i + ( synapse_h_i_update * alpha )
synapse_h_f = synapse_h_f + ( synapse_h_f_update * alpha )
synapse_h_o = synapse_h_o + ( synapse_h_o_update * alpha )
synapse_h_c = synapse_h_c + ( synapse_h_c_update * alpha )
synapse_b_1 = synapse_b_1 + ( synapse_b_1_update * alpha )
synapse_b_i = synapse_b_i + ( synapse_b_i_update * alpha )
synapse_b_f = synapse_b_f + ( synapse_b_f_update * alpha )
synapse_b_o = synapse_b_o + ( synapse_b_o_update * alpha )
synapse_b_c = synapse_b_c + ( synapse_b_c_update * alpha )
alpha = alpha * alpha_decay
synapse_0_i_update = synapse_0_i_update * momentum
synapse_0_f_update = synapse_0_f_update * momentum
synapse_0_o_update = synapse_0_o_update * momentum
synapse_0_c_update = synapse_0_c_update * momentum
synapse_1_update = synapse_1_update * momentum
synapse_h_i_update = synapse_h_i_update * momentum
synapse_h_f_update = synapse_h_f_update * momentum
synapse_h_o_update = synapse_h_o_update * momentum
synapse_h_c_update = synapse_h_c_update * momentum
synapse_b_1_update = synapse_b_1_update * momentum
synapse_b_i_update = synapse_b_i_update * momentum
synapse_b_f_update = synapse_b_f_update * momentum
synapse_b_o_update = synapse_b_o_update * momentum
synapse_b_c_update = synapse_b_c_update * momentum
}
# print out progress
if(j %% output_size ==0) {
print(paste("Error:", overallError," - alpha:",alpha))
print(paste("A :", paste(a, collapse = " ")))
print(paste("B :", paste(b, collapse = " ")))
print(paste("Pred:", paste(d, collapse = " ")))
print(paste("True:", paste(c, collapse = " ")))
out = 0
for (x in 1:length(d)) {
out[x] = rev(d)[x]*2^(x-1) }
print("----------------")
}
}
@m-r-munroe
Copy link

m-r-munroe commented Oct 13, 2016

Pred transitions to "NaN" pretty early when I copy/paste this and run it in.

[1] "Error: 6  - alpha: 0.0895784832903913"
[1] "A   : 1 0 1 1 1 0 1 0 0 0"
[1] "B   : 0 1 0 0 0 1 1 1 1 1"
[1] "Pred: 0 0 0 0 0 0 0 0 0 0"
[1] "True: 1 1 1 1 1 1 0 0 0 0"
[1] "----------------"
[1] "Error: 5  - alpha: 0.0891314857634352"
[1] "A   : 1 1 0 1 1 1 0 1 1 1"
[1] "B   : 1 0 0 1 1 1 0 0 1 0"
[1] "Pred: 1 1 1 1 1 1 1 1 1 1"
[1] "True: 0 0 1 0 1 1 1 1 0 0"
[1] "----------------"
[1] "Error: NaN  - alpha: 0.0886867187586064"
[1] "A   : 0 0 1 1 0 1 0 1 0 0"
[1] "B   : 0 0 1 0 0 0 1 1 0 0"
[1] "Pred: NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN"
[1] "True: 0 0 0 0 1 1 1 0 1 0"
[1] "----------------"

I tried re-installing the rnn, and got this:

> devtools::install_github('bquast/rnn')
Downloading GitHub repo bquast/rnn@master
from URL https://api.github.com/repos/bquast/rnn/zipball/master
Installing rnn
"C:/PROGRA~1/MICROS~4/MRO-33~1.1/bin/x64/R" --no-site-file --no-environ --no-save --no-restore --quiet  \
  CMD INSTALL "C:/Users/mrmunroe/AppData/Local/Temp/Rtmp2rgRru/devtools3a243e47770e/bquast-rnn-51341bc"  \
  --library="C:/Program Files/Microsoft/MRO-3.3.1/library" --install-tests 

* installing *source* package 'rnn' ...
** R
** inst
** tests
** preparing package for lazy loading
Error in loadNamespace(i, c(lib.loc, .libPaths()), versionCheck = vI[[i]]) : 
  there is no package called 'httpuv'
ERROR: lazy loading failed for package 'rnn'
* removing 'C:/Program Files/Microsoft/MRO-3.3.1/library/rnn'
Error: Command failed (1)

I am trying again with httpuv installed.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment