Last active
April 14, 2020 03:49
-
-
Save bayesball/bd5dd40e11b7e0461cdd93d8f91d5425 to your computer and use it in GitHub Desktop.
Prediction of 2019 HR counts using random effects model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################ | |
## NEW WORK - August 2, 2019 | |
################################################ | |
######################################### | |
# data for 1631 games through August 1 | |
######################################### | |
library(tidyverse) | |
library(CalledStrike) | |
sc2019 <- read_csv("~/Dropbox/2016 WORK/BLOG Baseball R/OTHER/StatcastData/statcast2019.csv") | |
# collect the number of home runs for each game | |
sc2019 %>% | |
group_by(game_pk) %>% | |
summarize(HR = sum(events == "home_run", | |
na.rm = TRUE)) -> S | |
## use poissonness plot to show lack of fit of P model | |
## 1631 games; 2430 - 1631 = 799 games to go | |
n_games <- 162 * 30 / 2 - nrow(S) | |
# bin the data and computing the vertical variable | |
# for the Poissoness Plot | |
S %>% | |
group_by(HR) %>% | |
summarize(N = n()) %>% | |
mutate(p = N / sum(N), | |
PPy = log(p) + log(factorial(HR))) -> SU | |
# Poissoness Plot | |
ggplot(SU, aes(HR, PPy)) + | |
geom_point(color = 'red', size = 3) + | |
geom_smooth(method = "lm", se = FALSE) + | |
increasefont() + | |
ggtitle("Poissoness Plot") + | |
centertitle() | |
## fit random effects model by MCMC Stan | |
library(brms) | |
fit <- brm(HR ~ (1 | game_pk), | |
data = S, | |
family = poisson) | |
# collect the MCMC draws | |
posterior_samples(fit) -> B | |
# B is matrix of 4000 observations and 1503 variables | |
# here's the simulated predictions | |
replicate(1000, | |
B %>% | |
sample_n(size = n_games) %>% | |
mutate(LAM = exp(rnorm(n_games, b_Intercept, | |
sd_game_pk__Intercept)), | |
Y = rpois(n_games, LAM)) %>% | |
summarize(HR = sum(Y) + sum(S$HR)) %>% | |
pull(HR)) -> predict_HR | |
# graph the 2019 predicted HR counts | |
ggplot(data.frame(HR = predict_HR + sum(S$HR)), | |
aes(HR)) + | |
geom_histogram(bins = 20, | |
color = "white", | |
fill = "orange") + | |
increasefont() + | |
ggtitle("2019 Home Run Predictions") + | |
centertitle() | |
######################################### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment