Skip to content

Instantly share code, notes, and snippets.

@seankross
Last active June 27, 2017 17:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seankross/20ea87c0ea4d99965b7e4e541050d1c2 to your computer and use it in GitHub Desktop.
Save seankross/20ea87c0ea4d99965b7e4e541050d1c2 to your computer and use it in GitHub Desktop.
library(dplyr)
library(purrr)
set.seed(2017-06-27)
# Imagine this function gives us measurements from nature
data_generator <- function(n = 500){
data_frame(var1 = rnorm(500)) %>%
mutate(var2 = var1 + rnorm(500)) %>%
mutate(var3 = var1 * var2 + rnorm(500)) %>%
mutate(resp = abs(var1 + var2 + var3)) %>%
mutate(resp = abs(resp)/max(resp)) %>%
mutate(resp = as.numeric(resp > median(resp)))
}
# Collect some data
dataset1 <- data_generator()
# Build a model
model <- glm(resp ~ ., data = dataset1, family = binomial())
model %>%
summary()
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -0.10570 0.10664 -0.991 0.32160
# var1 0.78714 0.15443 5.097 3.45e-07 ***
# var2 0.11033 0.09607 1.148 0.25082
# var3 0.18189 0.06002 3.031 0.00244 **
# Function to calculate RMSE
rmse <- . %>%
mutate(pred = predict(model, newdata = ., type = "response")) %>%
mutate(error = resp - pred) %>%
select(error) %>%
unlist() %>%
`^`(2) %>% mean() %>% sqrt()
# Function to do sampling with replacement
bootstrapped_rmse <- . %>%
sample_frac(replace = TRUE) %>%
rmse()
# 100 bootstrap replicates (do 1000 or 10000)
rmse_ci <- 1:100 %>%
map(~ dataset1) %>%
map_dbl(bootstrapped_rmse)
# Here's your 95% CI
quantile(rmse_ci, c(0.025, 0.975))
# 2.5% 97.5%
# 0.4473360 0.4744052
# Collect more data
dataset2 <- data_generator()
# Inspect RMSE
dataset2 %>%
rmse()
# 0.4580332
# It's in the CI which suggests the model might be good.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment