Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active July 18, 2024 14:44
Show Gist options
  • Save bayesball/7401c84b0d8fa1aab409df31842ceb51 to your computer and use it in GitHub Desktop.
Save bayesball/7401c84b0d8fa1aab409df31842ceb51 to your computer and use it in GitHub Desktop.
R function for implementing posterior predictive simulation of individual home run counts
prediction_normal <- function(S, games, f_games){
# data frame S contains batter, BIP, HR for
# first part of season
# games - average number of games already played
# f_games - number of future games played
# need to have the JAGS software installed
# https://mcmc-jags.sourceforge.io/
library(readr)
library(dplyr)
library(ggplot2)
library(runjags)
library(coda)
library(purrr)
library(tidyr)
JAGS_data <- list(N = nrow(S),
HR = S$HR,
BIP = S$BIP)
modelString <-"
model {
## sampling
for (i in 1:N){
HR[i] ~ dbin(p[i], BIP[i])
logit(p[i]) <- alpha[i]
alpha[i] ~ dnorm(theta, invtau2)
}
theta ~ dnorm(0, 0.001)
invtau2 <- 1 / (tau * tau)
tau ~ dt(0, 1, 1) T(0, )
}
"
# run JAGS, collecting 5000 draws from posterior
posterior <- run.jags(modelString,
n.chains = 1,
data = JAGS_data,
monitor = c("theta", "alpha", "tau"),
adapt = 1000,
burnin = 2000,
sample = 5000)
# obtain data frame of simulated draws
posterior %>% as.mcmc() %>%
as.data.frame() -> post2
# implements one posterior predictive simulation
one_sim <- function(j, BIP){
invlogit <- function(y){
exp(y) / (1 + exp(y))
}
alpha <- post2[j, 2:(JAGS_data$N - 1)]
p_sim <- unlist(invlogit(alpha))
data.frame(Sim = j,
batter = S$batter,
HR = rbinom(JAGS_data$N, size = BIP,
prob = p_sim))
}
# figure out future BIP
S |>
mutate(fBIP = round(BIP * f_games / games)) -> S
# simulate 500 from the posterior predictive distribution
HR_future <- map(sample(1:5000, 500), one_sim,
S$fBIP) |>
list_rbind()
# add known HR counts
inner_join(HR_future, S, by = "batter") |>
mutate(HR_final = HR.x + HR.y) -> HR_future
# interested in (1) max HR, (2) # >= 40, (3) # >= 30,
# (4) # >= 20
HR_future |>
group_by(Sim) |>
summarize(HR_Leading = max(HR_final),
HR_Count_GE_20 = sum(HR_final >= 20),
HR_Count_GE_30 = sum(HR_final >= 30),
HR_Count_GE_40 = sum(HR_final >= 40) ) -> S_future
# pivot longer
S_future |>
pivot_longer(
cols = starts_with("HR"),
names_to = "Type",
values_to = "Measure"
) -> S_graph
# interval estimates
S_graph |>
group_by(Type) |>
summarize(LO = quantile(Measure, 0.05),
HI = quantile(Measure, 0.95),
Prob = mean(Measure >= LO & Measure <= HI)) ->
interval_estimates
# display of prediction distributions
myplot <- ggplot(S_graph, aes(Measure)) +
geom_density(color = "red") +
facet_wrap(~ Type, scales = "free") +
labs(title = "Predictions of Final Season Totals") +
theme(text=element_text(size=18)) +
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
# output the interval estimates and the plot
list(interval_estimates = interval_estimates,
plot = myplot)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment