Last active
May 30, 2021 18:57
-
-
Save RobWiederstein/4b47ffac86d170413c750a43c16ea896 to your computer and use it in GitHub Desktop.
first tutorial from tidymodels
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################# | |
## tidymodels ## | |
## 1 build a model ## | |
## url: https://www.tidymodels.org/start/models/ ## | |
################################################################# | |
# 1.0 INTRODUCTION ---- | |
## 1.1 Parsnip pkg + the rest of tidymodels ---- | |
library(tidymodels) | |
## 1.2 Helper packages ---- | |
library(readr) # for importing data | |
library(broom.mixed) # for converting bayesian models to tidy tibbles | |
library(dotwhisker) # for visualizing regression results | |
# 2.0 SEA URCHINS DATA ---- | |
## 2.1 About ---- | |
# https://link.springer.com/article/10.1007/BF00349318 | |
## 2.2 Read-in ---- | |
urchins <- | |
# Data were assembled for a tutorial | |
# at https://www.flutterbys.com.au/stats/tut/tut7.5a.html | |
read_csv("https://tidymodels.org/start/models/urchins.csv") %>% | |
# Change the names to be a little more verbose | |
setNames(c("food_regime", "initial_volume", "width")) %>% | |
# Factors are very helpful for modeling, so we convert one column | |
mutate(food_regime = factor(food_regime, levels = c("Initial", "Low", "High"))) | |
## 2.3 Plot ---- | |
ggplot(urchins, | |
aes(x = initial_volume, | |
y = width, | |
group = food_regime, | |
col = food_regime)) + | |
geom_point() + | |
geom_smooth(method = lm, se = FALSE) + | |
scale_color_viridis_d(option = "plasma", end = .7) | |
# 3.0 BUILD AND FIT A MODEL ---- | |
#A standard two-way analysis of variance (ANOVA) model makes sense for this | |
#dataset because it contains a continuous and categorical predictor | |
## 3.1 Designate model ---- | |
lm_mod <- | |
linear_reg() %>% | |
set_engine("lm") | |
## 3.2 Train/fit/estimate model ---- | |
lm_fit <- | |
lm_mod %>% | |
fit(width ~ initial_volume * food_regime, data = urchins) | |
## 3.3 print tidy ---- | |
tidy(lm_fit) | |
## 3.4 plot results ---- | |
tidy(lm_fit) %>% | |
dwplot(dot_args = list(size = 2, color = "black"), | |
whisker_args = list(color = "black"), | |
vline = geom_vline(xintercept = 0, colour = "grey50", linetype = 2)) | |
# 4.0 USE A MODEL TO PREDICT ---- | |
## 4.1 Create new points ---- | |
new_points <- expand.grid(initial_volume = 20, | |
food_regime = c("Initial", "Low", "High")) | |
## 4.2 Fit model to new data points ---- | |
mean_pred <- predict(lm_fit, new_data = new_points) | |
## 4.3 Create confidence intervals for predictions ---- | |
conf_int_pred <- predict(lm_fit, | |
new_data = new_points, | |
type = "conf_int") | |
## 4.4 New data: new points + estimates + conf int ---- | |
plot_data <- | |
new_points %>% | |
bind_cols(mean_pred) %>% | |
bind_cols(conf_int_pred) | |
## 4.5 And then plot . . . ---- | |
ggplot(plot_data, aes(x = food_regime)) + | |
geom_point(aes(y = .pred)) + | |
geom_errorbar(aes(ymin = .pred_lower, | |
ymax = .pred_upper), | |
width = .2) + | |
labs(y = "urchin size") | |
# 5.0 MODEL WITH A DIFFERENT ENGINE ---- | |
## 5.1 Set the prior distribution ---- | |
#couldn't get 'rstanarm' installed | |
prior_dist <- rstanarm::student_t(df = 1) | |
set.seed(123) | |
## 5.2 Make the parsnip model ---- | |
bayes_mod <- | |
linear_reg() %>% | |
set_engine("stan", | |
prior_intercept = prior_dist, | |
prior = prior_dist) | |
## 5.3 Train the model ---- | |
bayes_fit <- | |
bayes_mod %>% | |
fit(width ~ initial_volume * food_regime, data = urchins) | |
## 5.4 Print the model ---- | |
print(bayes_fit, digits = 5) | |
# 6.0 WHY DOES IT WORK THAT WAY? ---- | |
#the modeling code uses the pipe to pass around the model object: | |
bayes_mod %>% | |
fit(width ~ initial_volume * food_regime, data = urchins) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment