Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active November 5, 2022 19:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/de552bc32575bfbf50362081ae4fda99 to your computer and use it in GitHub Desktop.
Save bayesball/de552bc32575bfbf50362081ae4fda99 to your computer and use it in GitHub Desktop.
R work for multinomial post -- main file is multinomial_setup.R
estimate_batting <- function(retro_final_PA_1990_2020d,
season,
s_woba = 0.5){
require(dplyr)
require(LearnBayes)
retro_final_PA_1990_2020d %>%
filter(YEAR == season) -> retroseason
retroseason %>%
group_by(BAT_ID) %>%
summarize(PA = n(),
wOBA = mean(EVENT_WOBA)) -> S
d <- cbind(S$wOBA, s_woba ^ 2 / S$PA)
fit <- laplace(normnormexch, c(0, 0), d)
mu <- fit$mode[1]
tau <- exp(fit$mode[2])
S$estimate <- (d[, 1] / d[, 2] + mu / tau ^ 2) /
(1 / d[, 2] + 1 / tau ^ 2)
S %>%
mutate(Season = season,
Batter = estimate) %>%
select(Season, BAT_ID, PA, wOBA, Batter)
}
library(readr)
library(dplyr)
source("estimate_batting.R")
source("pitcher_fit.R")
source("plot_woba.R")
source("plot_many_woba.R")
# read in main dataset
retro_final_PA_1990_2020d <-
read_csv("../data/retro_final_PA_1990-2020d.csv")
# compute wOBA batter estimates for all seasons
Estimates <- NULL
for(Season in 1990:2020){
S <- estimate_batting(retro_final_PA_1990_2020d,
season = Season,
s_woba = 0.5)
Estimates <- rbind(Estimates, S)
}
# find number of batters faced and mean wOBA for
# all pitchers
retro_final_PA_1990_2020d %>%
group_by(PIT_ID) %>%
summarize(BFP = n(),
WOBA = mean(EVENT_WOBA)) -> S
# only consider pitchers who faced at least
# 8000 hitters
inner_join(retro_final_PA_1990_2020d,
S, by = "PIT_ID") %>%
filter(BFP >= 8000) -> retro_8000
# data frame contains id and name for these "8000"
# pitchers
pitchers_8000 <- select(retro_8000,
PIT_ID, PIT_NAME) %>%
unique()
# illustrate calculations for single pitcher
out <- pitcher_fit(pitchers_8000[1, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)
plot_woba(out$r_woba)
# several pitchers
out1 <- pitcher_fit(pitchers_8000[37, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)
out2 <- pitcher_fit(pitchers_8000[15, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)
out3 <- pitcher_fit(pitchers_8000[18, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)
out4 <- pitcher_fit(pitchers_8000[27, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)
out5 <- pitcher_fit(pitchers_8000[45, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)
out6 <- pitcher_fit(pitchers_8000[58, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)
plot_many_woba(rbind(out1$r_woba,
out2$r_woba,
out3$r_woba,
out4$r_woba,
out5$r_woba,
out6$r_woba))
# collect slopes for all pitchers
one_fit <- function(j){
pitcher_fit(pitchers_8000[j, ],
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3)$slope
}
# add slopes to data frame of pitchers
slopes <- sapply(1:126, one_fit)
pitchers_8000$slope <- slopes
ggplot(pitchers_8000,
aes(slope)) +
geom_histogram(fill = "tan",
color = "blue",
bins = 15) +
ggtitle("Histogram of Slopes for 126 Pitchers") +
theme(text=element_text(size=18)) +
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
pitcher_fit <- function(pit_info,
retro_final_PA_1990_2020d,
Estimates,
batter_value = 0.3){
require(dplyr)
require(nnet)
require(LearnBayes)
require(ggplot2)
# get retrosheet data for the one pitcher
pit_id <- pit_info$PIT_ID
retro_data <- filter(retro_final_PA_1990_2020d,
PIT_ID == pit_id,
PA_IND == "TRUE")
# merge the batter wOBA estimates
inner_join(retro_data,
Estimates,
by = c("YEAR" = "Season",
"BAT_ID" = "BAT_ID")) ->
retro_data
# define the batting event variable
retro_data %>%
filter(EVENT_CODE %in% c("HP", "other", "W")) %>%
mutate(Event = ifelse(EVENT_CODE == "HP", "HBP",
ifelse(EVENT_CODE == "W", "uBB",
ifelse(HIT_VAL == 1, "1B",
ifelse(HIT_VAL == 2, "2B",
ifelse(HIT_VAL == 3, "3B",
ifelse(HIT_VAL == 4, "HR",
"Out"))))))) ->
retro_data
# convert event variable to a factor and
# make "Out" the reference category
retro_data$Event <- as.factor(retro_data$Event)
retro_data$Event2 <- relevel(retro_data$Event,
ref = "Out")
# multinomial model fit
fit <- nnet::multinom(Event2 ~ BATTER_SEQ_NUM + Batter,
data = retro_data)
# extract parameter estimates and associated
# var-cov matrix
alpha <- c(t(coef(fit)))
v <- vcov(fit)
# simulate 1000 draws from approximate posterior
r_alpha <- rmnorm(1000, alpha, v)
# function to convert par estimates to probabilities
convert_to_probs <- function(alpha){
c(1, exp(alpha)) / (1 + sum(exp(alpha)))
}
# for each value of BATTER_SEQ_NUM (1-30) and fixed value
# of batter woba, obtain simulated draws of
# expected wOBA
r_woba <- NULL
for(b in 1:30){
pred <- c(1, b, batter_value)
r_lin_pred <- cbind(r_alpha[, 1:3] %*% pred,
r_alpha[, 4:6] %*% pred,
r_alpha[, 7:9] %*% pred,
r_alpha[, 10:12] %*% pred,
r_alpha[, 13:15] %*% pred,
r_alpha[, 16:18] %*% pred)
probs <- t(apply(r_lin_pred, 1, convert_to_probs))
woba_weights <- c(0, 0.89, 1.27, 1.62, 0.72,
2.10, 0.69)
r_woba <- rbind(r_woba,
data.frame(BATTER_SEQ_NUM = b,
wOBA = probs %*% woba_weights))
}
r_woba %>%
mutate(PIT_NAME = pit_info$PIT_NAME) -> r_woba
# summarize trend of (BATTER_SEQ_NUM, E(wOBA))
slope <- coef(lm(wOBA ~ BATTER_SEQ_NUM,
data = r_woba))[2]
list(slope = slope, r_woba = r_woba)
}
plot_many_woba <- function(r_woba){
require(ggplot2)
ggplot(r_woba,
aes(BATTER_SEQ_NUM, wOBA)) +
geom_jitter(size = 0.2) +
geom_smooth(method = "gam",
color = "red") +
ggtitle("Expected wOBA Against Batter Number") +
ylab("Expected wOBA") +
theme(text=element_text(size=18)) +
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0)) +
facet_wrap(~ PIT_NAME)
}
plot_woba <- function(r_woba){
require(ggplot2)
ggplot(r_woba,
aes(BATTER_SEQ_NUM, wOBA)) +
geom_jitter(size = 0.2) +
geom_smooth(method = "gam",
color = "red") +
ggtitle(r_woba$PIT_NAME[1]) +
ylab("Expected wOBA") +
theme(text=element_text(size=18)) +
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment