andreyuhai/times_table.R

## times_table.R
library(caret)
library(ModelMetrics)
library(recipes)
library(neuralnet)
library(sigmoid)

# Create the dataset
tt <- data.frame(multiplier = rep(1:10, times = 10), multiplicand = rep(1:10, each = 10))
tt <- cbind(tt, data.frame(product = tt$multiplier * tt$multiplicand))

# Splitting
set.seed(1234)
indexes <- createDataPartition(tt$product,
                              times = 1,
                              p = 0.7,
                              list = FALSE)
tt.train <- tt[indexes,]
tt.test <- tt[-indexes,]

# Pre-process

# preProc <- preProcess(tt, method = c('center', 'scale'))
# tt.preProcessed <- predict(preProc, tt)
# tt.preProcessed.train <- tt.preProcessed[indexes,]
# tt.preProcessed.test <- tt.preProcessed[-indexes,]

# Recipe to pre-process our data
rec_reg <- recipe(product ~ ., data = tt.train) %>%
  step_center(all_predictors()) %>% step_scale(all_predictors()) %>%
  step_center(all_outcomes()) %>% step_scale(all_outcomes())

# Train
train.control <- trainControl(method = "repeatedcv",
                              number = 10,
                              repeats = 3,
                              savePredictions = TRUE,
                              predictionBounds = c(1, 100))

tune.grid <- expand.grid(layer1 = 8,
                         layer2 = 0,
                         layer3 = 0)

# Setting seed for reproducibility
set.seed(12)
tt.cv <- train(rec_reg,
               data = tt.train,
               method = 'neuralnet',
               tuneGrid = tune.grid,
               trControl = train.control,
               algorithm = 'backprop',
               learningrate = 0.005,
               lifesign = 'minimal')

predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means

first_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][1])
second_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][2][[1]])

learning_rate = 0.005

input = tt.cv$finalModel$data[1,1:2]
output = tt.cv$finalModel$data[1, 3]

input_w_bias = matrix(c(1, as.matrix(input)), 3, 1) # First one is for the bias

hidden_layer_net = t(first_layer_weights) %*% as.matrix(input_w_bias)
hidden_layer_activated = sigmoid(hidden_layer_net)

hidden_layer_activated_w_bias = rbind(1, hidden_layer_activated)

output_predicted = t(second_layer_weights) %*% hidden_layer_activated_w_bias
error = ((output - output_predicted) ^ 2) / length(tt.test[,1])


# First derror/dw2 = derror/dout * dout/dw2

derror_dout = 2 * (output_predicted - output)
# derivative_out_dw = appropriate weights

derror_dweight = c(derror_dout) * hidden_layer_activated_w_bias

old_second_layer_weights = second_layer_weights
second_layer_weights = second_layer_weights - (learning_rate *derror_dweight)

# From second layer we will propagate back again

# First derror/dw1 = derror/dh_act * dh_act/dh_net * dh_net/dw1

# equal to derror/dout since we do not have sigmoid at the end
derror_dh_act = derror_dout

# dh_act/dh_net =  sig(a) * (1 - sig(a))
dh_act_dh_net = hidden_layer_activated * ( 1 - hidden_layer_activated )

# dh_net / dw1 = w
dh_net_dw1 = first_layer_weights

#derror/dw1 = dh_error_dh_act * dh_act_dh_net * dh_net_dw1
derror_dw1 = as.vector(derror_dh_act) * (dh_net_dw1 * dh_act_dh_net)


first_layer_weights = first_layer_weights - (learning_rate * derror_dw1)

# Predict with the start weights and then calculate RMSE
tt.cv$finalModel$weights = tt.cv$finalModel$startweights
prediction_w_start_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means
mse(prediction_w_start_w, tt.test$product)

# Use adjusted weights to predict RMSE again
tt.cv$finalModel$weights[[1]][[2]] = as.matrix(second_layer_weights)
tt.cv$finalModel$weights[[1]][[1]] = as.matrix(first_layer_weights)
prediction_w_updated_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means
mse(prediction_w_updated_w, tt.test$product)


# w1 = cbind(c(1, 1, 1), tt.cv$finalModel$startweights[[1]][1][[1]])
# w2 = rbind(1, tt.cv$finalModel$startweights[[1]][2][[1]])
	library(caret)
	library(ModelMetrics)
	library(recipes)
	library(neuralnet)
	library(sigmoid)

	# Create the dataset
	tt <- data.frame(multiplier = rep(1:10, times = 10), multiplicand = rep(1:10, each = 10))
	tt <- cbind(tt, data.frame(product = tt$multiplier * tt$multiplicand))

	# Splitting
	set.seed(1234)
	indexes <- createDataPartition(tt$product,
	times = 1,
	p = 0.7,
	list = FALSE)
	tt.train <- tt[indexes,]
	tt.test <- tt[-indexes,]

	# Pre-process

	# preProc <- preProcess(tt, method = c('center', 'scale'))
	# tt.preProcessed <- predict(preProc, tt)
	# tt.preProcessed.train <- tt.preProcessed[indexes,]
	# tt.preProcessed.test <- tt.preProcessed[-indexes,]

	# Recipe to pre-process our data
	rec_reg <- recipe(product ~ ., data = tt.train) %>%
	step_center(all_predictors()) %>% step_scale(all_predictors()) %>%
	step_center(all_outcomes()) %>% step_scale(all_outcomes())

	# Train
	train.control <- trainControl(method = "repeatedcv",
	number = 10,
	repeats = 3,
	savePredictions = TRUE,
	predictionBounds = c(1, 100))

	tune.grid <- expand.grid(layer1 = 8,
	layer2 = 0,
	layer3 = 0)

	# Setting seed for reproducibility
	set.seed(12)
	tt.cv <- train(rec_reg,
	data = tt.train,
	method = 'neuralnet',
	tuneGrid = tune.grid,
	trControl = train.control,
	algorithm = 'backprop',
	learningrate = 0.005,
	lifesign = 'minimal')

	predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means

	first_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][1])
	second_layer_weights = as.data.frame(tt.cv$finalModel$startweights[[1]][2][[1]])

	learning_rate = 0.005

	input = tt.cv$finalModel$data[1,1:2]
	output = tt.cv$finalModel$data[1, 3]

	input_w_bias = matrix(c(1, as.matrix(input)), 3, 1) # First one is for the bias

	hidden_layer_net = t(first_layer_weights) %*% as.matrix(input_w_bias)
	hidden_layer_activated = sigmoid(hidden_layer_net)

	hidden_layer_activated_w_bias = rbind(1, hidden_layer_activated)

	output_predicted = t(second_layer_weights) %*% hidden_layer_activated_w_bias
	error = ((output - output_predicted) ^ 2) / length(tt.test[,1])


	# First derror/dw2 = derror/dout * dout/dw2

	derror_dout = 2 * (output_predicted - output)
	# derivative_out_dw = appropriate weights

	derror_dweight = c(derror_dout) * hidden_layer_activated_w_bias

	old_second_layer_weights = second_layer_weights
	second_layer_weights = second_layer_weights - (learning_rate *derror_dweight)

	# From second layer we will propagate back again

	# First derror/dw1 = derror/dh_act * dh_act/dh_net * dh_net/dw1

	# equal to derror/dout since we do not have sigmoid at the end
	derror_dh_act = derror_dout

	# dh_act/dh_net = sig(a) * (1 - sig(a))
	dh_act_dh_net = hidden_layer_activated * ( 1 - hidden_layer_activated )

	# dh_net / dw1 = w
	dh_net_dw1 = first_layer_weights

	#derror/dw1 = dh_error_dh_act * dh_act_dh_net * dh_net_dw1
	derror_dw1 = as.vector(derror_dh_act) * (dh_net_dw1 * dh_act_dh_net)


	first_layer_weights = first_layer_weights - (learning_rate * derror_dw1)

	# Predict with the start weights and then calculate RMSE
	tt.cv$finalModel$weights = tt.cv$finalModel$startweights
	prediction_w_start_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means
	mse(prediction_w_start_w, tt.test$product)

	# Use adjusted weights to predict RMSE again
	tt.cv$finalModel$weights[[1]][[2]] = as.matrix(second_layer_weights)
	tt.cv$finalModel$weights[[1]][[1]] = as.matrix(first_layer_weights)
	prediction_w_updated_w = predict(tt.cv, tt.test) * tt.cv$recipe$steps[[4]]$sds + tt.cv$recipe$steps[[3]]$means
	mse(prediction_w_updated_w, tt.test$product)



	# w1 = cbind(c(1, 1, 1), tt.cv$finalModel$startweights[[1]][1][[1]])
	# w2 = rbind(1, tt.cv$finalModel$startweights[[1]][2][[1]])