Created
April 12, 2023 12:51
-
-
Save bayesball/b58bc28838bbc3c459a34e6c92864b53 to your computer and use it in GitHub Desktop.
R code to fit several models to study temperature effect on home run hitting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(brms) | |
# read in data from Github site | |
all_data <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/ten_seasons_retro_hr.csv") | |
# remove all parks that are either dome or retractable | |
parks <- unique(all_data$PARK_ID) | |
domed_teams <- parks[c(2, 12, 15, 16, 20, 25, 28, | |
29, 30, 32, 33, 35, 36, | |
38, 39, 40, 41, 42, 43)] | |
all_data2 <- filter(all_data, | |
(PARK_ID %in% domed_teams) == FALSE) | |
# create Game and Season variables | |
all_data2 %>% | |
mutate(Game = as.numeric(as.factor(GAME_ID)), | |
Season = substr(GAME_ID, 4, 7)) -> | |
all_data2 | |
# Bayesian fitting - Poisson errors | |
bfit3 <- brm(HR ~ TEMP_PARK_CT + PARK_ID + Season, | |
family = "poisson", | |
data = all_data2) | |
# extract simulated draws from posterior | |
B3 <- as_draws_df(bfit3) | |
# mean and sd of temp effect | |
round(c(mean(B3$b_TEMP_PARK_CT), sd(B3$b_TEMP_PARK_CT)), 5) | |
# posterior predictive check | |
pp_check(bfit3, type = "stat_2d") | |
# negative binomial errors | |
bfit2 <- brm(HR ~ TEMP_PARK_CT + PARK_ID + Season, | |
family = "negbinomial", | |
data = all_data2) | |
B2 <- as_draws_df(bfit2) | |
# posterior density of temp effect | |
hist(B2$b_TEMP_PARK_CT) | |
# posterior mean and sd | |
round(c(mean(B2$b_TEMP_PARK_CT), sd(B2$b_TEMP_PARK_CT)), 5) | |
# posterior predictive check | |
pp_check(bfit2, type = "stat_2d") | |
# predictions from model | |
# predictions for 2022 season | |
newdata <- filter(all_data2, Season == 2022) | |
out <- predict(bfit2, newdata = newdata, | |
summary = FALSE) | |
# get home run sums for each season | |
pred_sum <- apply(out, 1, sum) | |
mean(pred_sum) | |
hist(pred_sum) | |
# change 2022 seasons temps by one degree | |
newdata2 <- filter(all_data2, Season == 2022) | |
N <- nrow(newdata2) | |
newdata2$TEMP_PARK_CT <- newdata2$TEMP_PARK_CT + 1 | |
out2 <- predict(bfit2, newdata = newdata2, | |
summary = FALSE) | |
pred_sum2 <- apply(out2, 1, sum) | |
mean(pred_sum2) | |
hist(pred_sum2, main = "Predictive Distribution") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment