Last active
December 17, 2019 20:54
Times table prediction using neuralnet & caret in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(caret) | |
library(ModelMetrics) | |
library(recipes) | |
library(neuralnet) | |
library(sigmoid) | |
# Create the dataset | |
tt <- data.frame(multiplier = rep(1:10, times = 10), multiplicand = rep(1:10, each = 10)) | |
tt <- cbind(tt, data.frame(product = tt$multiplier * tt$multiplicand)) | |
# Splitting | |
set.seed(1234) | |
indexes <- createDataPartition(tt$product, | |
times = 1, | |
p = 0.7, | |
list = FALSE) | |
tt.train <- tt[indexes,] | |
tt.test <- tt[-indexes,] | |
# Pre-process | |
# preProc <- preProcess(tt, method = c('center', 'scale')) | |
# tt.preProcessed <- predict(preProc, tt) | |
# tt.preProcessed.train <- tt.preProcessed[indexes,] | |
# tt.preProcessed.test <- tt.preProcessed[-indexes,] | |
# Recipe to pre-process our data | |
rec_reg <- recipe(product ~ ., data = tt.train) %>% | |
step_center(all_predictors()) %>% step_scale(all_predictors()) %>% | |
step_center(all_outcomes()) %>% step_scale(all_outcomes()) | |
# Train | |
train.control <- trainControl(method = "repeatedcv", | |
number = 10, | |
repeats = 3, | |
savePredictions = TRUE, | |
predictionBounds = c(1, 100)) | |
tune.grid <- expand.grid(layer1 = 8, | |
layer2 = 0, | |
layer3 = 0) | |
# Setting seed for reproducibility | |
set.seed(12) | |
tt.cv <- train(rec_reg, | |
data = tt.train, | |
method = 'neuralnet', | |
tuneGrid = tune.grid, | |
trControl = train.control, | |
algorithm = 'backprop', | |
learningrate = 0.005, | |
lifesign = 'minimal') | |
predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means | |
first_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][1]) | |
second_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][2][[1]]) | |
learning_rate = 0.005 | |
input = tt.cv$finalModel$data[1,1:2] | |
output = tt.cv$finalModel$data[1, 3] | |
input_w_bias = matrix(c(1, as.matrix(input)), 3, 1) # First one is for the bias | |
hidden_layer_net = t(first_layer_weights) %*% as.matrix(input_w_bias) | |
hidden_layer_activated = sigmoid(hidden_layer_net) | |
hidden_layer_activated_w_bias = rbind(1, hidden_layer_activated) | |
output_predicted = t(second_layer_weights) %*% hidden_layer_activated_w_bias | |
error = ((output - output_predicted) ^ 2) / length(tt.test[,1]) | |
# First derror/dw2 = derror/dout * dout/dw2 | |
derror_dout = 2 * (output_predicted - output) | |
# derivative_out_dw = appropriate weights | |
derror_dweight = c(derror_dout) * hidden_layer_activated_w_bias | |
old_second_layer_weights = second_layer_weights | |
second_layer_weights = second_layer_weights - (learning_rate *derror_dweight) | |
# From second layer we will propagate back again | |
# First derror/dw1 = derror/dh_act * dh_act/dh_net * dh_net/dw1 | |
# equal to derror/dout since we do not have sigmoid at the end | |
derror_dh_act = derror_dout | |
# dh_act/dh_net = sig(a) * (1 - sig(a)) | |
dh_act_dh_net = hidden_layer_activated * ( 1 - hidden_layer_activated ) | |
# dh_net / dw1 = w | |
dh_net_dw1 = first_layer_weights | |
#derror/dw1 = dh_error_dh_act * dh_act_dh_net * dh_net_dw1 | |
derror_dw1 = as.vector(derror_dh_act) * (dh_net_dw1 * dh_act_dh_net) | |
first_layer_weights = first_layer_weights - (learning_rate * derror_dw1) | |
# Predict with the start weights and then calculate RMSE | |
tt.cv$finalModel$weights = tt.cv$finalModel$startweights | |
prediction_w_start_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means | |
mse(prediction_w_start_w, tt.test$product) | |
# Use adjusted weights to predict RMSE again | |
tt.cv$finalModel$weights[[1]][[2]] = as.matrix(second_layer_weights) | |
tt.cv$finalModel$weights[[1]][[1]] = as.matrix(first_layer_weights) | |
prediction_w_updated_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means | |
mse(prediction_w_updated_w, tt.test$product) | |
# w1 = cbind(c(1, 1, 1), tt.cv$finalModel$startweights[[1]][1][[1]]) | |
# w2 = rbind(1, tt.cv$finalModel$startweights[[1]][2][[1]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment