Skip to content

Instantly share code, notes, and snippets.

@ianjohns
Created September 7, 2019 20:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ianjohns/9abd4980cb8136a8bc4e651e45b56864 to your computer and use it in GitHub Desktop.
Save ianjohns/9abd4980cb8136a8bc4e651e45b56864 to your computer and use it in GitHub Desktop.
R neuralnet package
#cars_19 data set
#neural network with 2 hidden layers (7 neurons and 3 neurons)
#raw data
#https://www.fueleconomy.gov/feg/epadata/19data.zip
library(neuralnet)
library(caret)
#load("~/R_Cars_19/Data/cars_19.Rdata")
title <- "Neural Network"
maxs <- apply(cars_19[, c(1:3, 5, 8)], 2, max)
mins <- apply(cars_19[, c(1:3, 5, 8)], 2, min)
scaled <- as.data.frame(scale(cars_19[, c(1:3, 5, 8)], center = mins, scale = maxs - mins))
tmp <- data.frame(scaled, cars_19[, c(4, 6, 7, 9:12)])
n <- names(cars_19)
f <- as.formula(paste("fuel_economy_combined ~", paste(n[!n %in% "fuel_economy_combined"], collapse = " + ")))
m <- model.matrix(f, data = tmp)
m <- as.matrix(data.frame(m, tmp[, 1]))
colnames(m)[28] <- "fuel_economy_combined"
set.seed(123)
indices <- sample(1:nrow(cars_19), size = 0.75 * nrow(cars_19))
train <- m[indices,]
test <- m[-indices,]
n <- colnames(m)[2:28]
f <- as.formula(paste("fuel_economy_combined ~", paste(n[!n %in% "fuel_economy_combined"], collapse = " + ")))
m1_nn <- neuralnet(f,
data = train,
hidden = c(7,3),
linear.output = TRUE)
pred_nn <- predict(m1_nn, test)
yhat <-pred_nn * (max(cars_19$fuel_economy_combined) - min(cars_19$fuel_economy_combined)) + min(cars_19$fuel_economy_combined)
y <- test[, 28] * (max(cars_19$fuel_economy_combined) - min(cars_19$fuel_economy_combined)) +min(cars_19$fuel_economy_combined)
postResample(yhat, y)
##################################
#20 fold cv
set.seed(123)
stats <- NULL
for (i in 1:20) {
indices <- sample(1:nrow(cars_19), size = 0.75 * nrow(cars_19))
train_tmp <- m[indices, ]
test_tmp <- m[-indices, ]
nn_tmp <- neuralnet(f,
data = train_tmp,
hidden = c(7, 3),
linear.output = TRUE)
pred_nn_tmp <- predict(nn_tmp, test_tmp)
yhat <- pred_nn_tmp * (max(cars_19$fuel_economy_combined) - min(cars_19$fuel_economy_combined)) + min(cars_19$fuel_economy_combined)
y <- test_tmp[, 28] * (max(cars_19$fuel_economy_combined) - min(cars_19$fuel_economy_combined)) + min(cars_19$fuel_economy_combined)
stats_tmp <- postResample(yhat, y)
stats <- rbind(stats, stats_tmp)
cat(i, "\n")
}
mean(stats[, 1] ^ 2) #avg mse 4.261991
mean(stats[, 1] ^ 2) ^ .5 #avg rmse 2.064459
colMeans(stats) #ignore rmse
#RMSE Rsquared MAE
#xxx 0.880502 1.466458
plot(nn_tmp,rep="best")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment