Last active
December 17, 2019 20:54
-
-
Save andreyuhai/f299282f5a827e2a27c586afc9eb4eb5 to your computer and use it in GitHub Desktop.
Times table prediction using neuralnet & caret in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(caret) | |
library(ModelMetrics) | |
library(recipes) | |
library(neuralnet) | |
library(sigmoid) | |
# Create the dataset | |
tt <- data.frame(multiplier = rep(1:10, times = 10), multiplicand = rep(1:10, each = 10)) | |
tt <- cbind(tt, data.frame(product = tt$multiplier * tt$multiplicand)) | |
# Splitting | |
set.seed(1234) | |
indexes <- createDataPartition(tt$product, | |
times = 1, | |
p = 0.7, | |
list = FALSE) | |
tt.train <- tt[indexes,] | |
tt.test <- tt[-indexes,] | |
# Pre-process | |
# preProc <- preProcess(tt, method = c('center', 'scale')) | |
# tt.preProcessed <- predict(preProc, tt) | |
# tt.preProcessed.train <- tt.preProcessed[indexes,] | |
# tt.preProcessed.test <- tt.preProcessed[-indexes,] | |
# Recipe to pre-process our data | |
rec_reg <- recipe(product ~ ., data = tt.train) %>% | |
step_center(all_predictors()) %>% step_scale(all_predictors()) %>% | |
step_center(all_outcomes()) %>% step_scale(all_outcomes()) | |
# Train | |
train.control <- trainControl(method = "repeatedcv", | |
number = 10, | |
repeats = 3, | |
savePredictions = TRUE, | |
predictionBounds = c(1, 100)) | |
tune.grid <- expand.grid(layer1 = 8, | |
layer2 = 0, | |
layer3 = 0) | |
# Setting seed for reproducibility | |
set.seed(12) | |
tt.cv <- train(rec_reg, | |
data = tt.train, | |
method = 'neuralnet', | |
tuneGrid = tune.grid, | |
trControl = train.control, | |
algorithm = 'backprop', | |
learningrate = 0.005, | |
lifesign = 'minimal') | |
predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means | |
first_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][1]) | |
second_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][2][[1]]) | |
learning_rate = 0.005 | |
input = tt.cv$finalModel$data[1,1:2] | |
output = tt.cv$finalModel$data[1, 3] | |
input_w_bias = matrix(c(1, as.matrix(input)), 3, 1) # First one is for the bias | |
hidden_layer_net = t(first_layer_weights) %*% as.matrix(input_w_bias) | |
hidden_layer_activated = sigmoid(hidden_layer_net) | |
hidden_layer_activated_w_bias = rbind(1, hidden_layer_activated) | |
output_predicted = t(second_layer_weights) %*% hidden_layer_activated_w_bias | |
error = ((output - output_predicted) ^ 2) / length(tt.test[,1]) | |
# First derror/dw2 = derror/dout * dout/dw2 | |
derror_dout = 2 * (output_predicted - output) | |
# derivative_out_dw = appropriate weights | |
derror_dweight = c(derror_dout) * hidden_layer_activated_w_bias | |
old_second_layer_weights = second_layer_weights | |
second_layer_weights = second_layer_weights - (learning_rate *derror_dweight) | |
# From second layer we will propagate back again | |
# First derror/dw1 = derror/dh_act * dh_act/dh_net * dh_net/dw1 | |
# equal to derror/dout since we do not have sigmoid at the end | |
derror_dh_act = derror_dout | |
# dh_act/dh_net = sig(a) * (1 - sig(a)) | |
dh_act_dh_net = hidden_layer_activated * ( 1 - hidden_layer_activated ) | |
# dh_net / dw1 = w | |
dh_net_dw1 = first_layer_weights | |
#derror/dw1 = dh_error_dh_act * dh_act_dh_net * dh_net_dw1 | |
derror_dw1 = as.vector(derror_dh_act) * (dh_net_dw1 * dh_act_dh_net) | |
first_layer_weights = first_layer_weights - (learning_rate * derror_dw1) | |
# Predict with the start weights and then calculate RMSE | |
tt.cv$finalModel$weights = tt.cv$finalModel$startweights | |
prediction_w_start_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means | |
mse(prediction_w_start_w, tt.test$product) | |
# Use adjusted weights to predict RMSE again | |
tt.cv$finalModel$weights[[1]][[2]] = as.matrix(second_layer_weights) | |
tt.cv$finalModel$weights[[1]][[1]] = as.matrix(first_layer_weights) | |
prediction_w_updated_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means | |
mse(prediction_w_updated_w, tt.test$product) | |
# w1 = cbind(c(1, 1, 1), tt.cv$finalModel$startweights[[1]][1][[1]]) | |
# w2 = rbind(1, tt.cv$finalModel$startweights[[1]][2][[1]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment