Last active
November 5, 2022 19:43
-
-
Save bayesball/de552bc32575bfbf50362081ae4fda99 to your computer and use it in GitHub Desktop.
R work for multinomial post -- main file is multinomial_setup.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
estimate_batting <- function(retro_final_PA_1990_2020d, | |
season, | |
s_woba = 0.5){ | |
require(dplyr) | |
require(LearnBayes) | |
retro_final_PA_1990_2020d %>% | |
filter(YEAR == season) -> retroseason | |
retroseason %>% | |
group_by(BAT_ID) %>% | |
summarize(PA = n(), | |
wOBA = mean(EVENT_WOBA)) -> S | |
d <- cbind(S$wOBA, s_woba ^ 2 / S$PA) | |
fit <- laplace(normnormexch, c(0, 0), d) | |
mu <- fit$mode[1] | |
tau <- exp(fit$mode[2]) | |
S$estimate <- (d[, 1] / d[, 2] + mu / tau ^ 2) / | |
(1 / d[, 2] + 1 / tau ^ 2) | |
S %>% | |
mutate(Season = season, | |
Batter = estimate) %>% | |
select(Season, BAT_ID, PA, wOBA, Batter) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
source("estimate_batting.R") | |
source("pitcher_fit.R") | |
source("plot_woba.R") | |
source("plot_many_woba.R") | |
# read in main dataset | |
retro_final_PA_1990_2020d <- | |
read_csv("../data/retro_final_PA_1990-2020d.csv") | |
# compute wOBA batter estimates for all seasons | |
Estimates <- NULL | |
for(Season in 1990:2020){ | |
S <- estimate_batting(retro_final_PA_1990_2020d, | |
season = Season, | |
s_woba = 0.5) | |
Estimates <- rbind(Estimates, S) | |
} | |
# find number of batters faced and mean wOBA for | |
# all pitchers | |
retro_final_PA_1990_2020d %>% | |
group_by(PIT_ID) %>% | |
summarize(BFP = n(), | |
WOBA = mean(EVENT_WOBA)) -> S | |
# only consider pitchers who faced at least | |
# 8000 hitters | |
inner_join(retro_final_PA_1990_2020d, | |
S, by = "PIT_ID") %>% | |
filter(BFP >= 8000) -> retro_8000 | |
# data frame contains id and name for these "8000" | |
# pitchers | |
pitchers_8000 <- select(retro_8000, | |
PIT_ID, PIT_NAME) %>% | |
unique() | |
# illustrate calculations for single pitcher | |
out <- pitcher_fit(pitchers_8000[1, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3) | |
plot_woba(out$r_woba) | |
# several pitchers | |
out1 <- pitcher_fit(pitchers_8000[37, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3) | |
out2 <- pitcher_fit(pitchers_8000[15, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3) | |
out3 <- pitcher_fit(pitchers_8000[18, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3) | |
out4 <- pitcher_fit(pitchers_8000[27, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3) | |
out5 <- pitcher_fit(pitchers_8000[45, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3) | |
out6 <- pitcher_fit(pitchers_8000[58, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3) | |
plot_many_woba(rbind(out1$r_woba, | |
out2$r_woba, | |
out3$r_woba, | |
out4$r_woba, | |
out5$r_woba, | |
out6$r_woba)) | |
# collect slopes for all pitchers | |
one_fit <- function(j){ | |
pitcher_fit(pitchers_8000[j, ], | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3)$slope | |
} | |
# add slopes to data frame of pitchers | |
slopes <- sapply(1:126, one_fit) | |
pitchers_8000$slope <- slopes | |
ggplot(pitchers_8000, | |
aes(slope)) + | |
geom_histogram(fill = "tan", | |
color = "blue", | |
bins = 15) + | |
ggtitle("Histogram of Slopes for 126 Pitchers") + | |
theme(text=element_text(size=18)) + | |
theme(plot.title = element_text(colour = "blue", size = 18, | |
hjust = 0.5, vjust = 0.8, angle = 0)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pitcher_fit <- function(pit_info, | |
retro_final_PA_1990_2020d, | |
Estimates, | |
batter_value = 0.3){ | |
require(dplyr) | |
require(nnet) | |
require(LearnBayes) | |
require(ggplot2) | |
# get retrosheet data for the one pitcher | |
pit_id <- pit_info$PIT_ID | |
retro_data <- filter(retro_final_PA_1990_2020d, | |
PIT_ID == pit_id, | |
PA_IND == "TRUE") | |
# merge the batter wOBA estimates | |
inner_join(retro_data, | |
Estimates, | |
by = c("YEAR" = "Season", | |
"BAT_ID" = "BAT_ID")) -> | |
retro_data | |
# define the batting event variable | |
retro_data %>% | |
filter(EVENT_CODE %in% c("HP", "other", "W")) %>% | |
mutate(Event = ifelse(EVENT_CODE == "HP", "HBP", | |
ifelse(EVENT_CODE == "W", "uBB", | |
ifelse(HIT_VAL == 1, "1B", | |
ifelse(HIT_VAL == 2, "2B", | |
ifelse(HIT_VAL == 3, "3B", | |
ifelse(HIT_VAL == 4, "HR", | |
"Out"))))))) -> | |
retro_data | |
# convert event variable to a factor and | |
# make "Out" the reference category | |
retro_data$Event <- as.factor(retro_data$Event) | |
retro_data$Event2 <- relevel(retro_data$Event, | |
ref = "Out") | |
# multinomial model fit | |
fit <- nnet::multinom(Event2 ~ BATTER_SEQ_NUM + Batter, | |
data = retro_data) | |
# extract parameter estimates and associated | |
# var-cov matrix | |
alpha <- c(t(coef(fit))) | |
v <- vcov(fit) | |
# simulate 1000 draws from approximate posterior | |
r_alpha <- rmnorm(1000, alpha, v) | |
# function to convert par estimates to probabilities | |
convert_to_probs <- function(alpha){ | |
c(1, exp(alpha)) / (1 + sum(exp(alpha))) | |
} | |
# for each value of BATTER_SEQ_NUM (1-30) and fixed value | |
# of batter woba, obtain simulated draws of | |
# expected wOBA | |
r_woba <- NULL | |
for(b in 1:30){ | |
pred <- c(1, b, batter_value) | |
r_lin_pred <- cbind(r_alpha[, 1:3] %*% pred, | |
r_alpha[, 4:6] %*% pred, | |
r_alpha[, 7:9] %*% pred, | |
r_alpha[, 10:12] %*% pred, | |
r_alpha[, 13:15] %*% pred, | |
r_alpha[, 16:18] %*% pred) | |
probs <- t(apply(r_lin_pred, 1, convert_to_probs)) | |
woba_weights <- c(0, 0.89, 1.27, 1.62, 0.72, | |
2.10, 0.69) | |
r_woba <- rbind(r_woba, | |
data.frame(BATTER_SEQ_NUM = b, | |
wOBA = probs %*% woba_weights)) | |
} | |
r_woba %>% | |
mutate(PIT_NAME = pit_info$PIT_NAME) -> r_woba | |
# summarize trend of (BATTER_SEQ_NUM, E(wOBA)) | |
slope <- coef(lm(wOBA ~ BATTER_SEQ_NUM, | |
data = r_woba))[2] | |
list(slope = slope, r_woba = r_woba) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_many_woba <- function(r_woba){ | |
require(ggplot2) | |
ggplot(r_woba, | |
aes(BATTER_SEQ_NUM, wOBA)) + | |
geom_jitter(size = 0.2) + | |
geom_smooth(method = "gam", | |
color = "red") + | |
ggtitle("Expected wOBA Against Batter Number") + | |
ylab("Expected wOBA") + | |
theme(text=element_text(size=18)) + | |
theme(plot.title = element_text(colour = "blue", size = 18, | |
hjust = 0.5, vjust = 0.8, angle = 0)) + | |
facet_wrap(~ PIT_NAME) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_woba <- function(r_woba){ | |
require(ggplot2) | |
ggplot(r_woba, | |
aes(BATTER_SEQ_NUM, wOBA)) + | |
geom_jitter(size = 0.2) + | |
geom_smooth(method = "gam", | |
color = "red") + | |
ggtitle(r_woba$PIT_NAME[1]) + | |
ylab("Expected wOBA") + | |
theme(text=element_text(size=18)) + | |
theme(plot.title = element_text(colour = "blue", size = 18, | |
hjust = 0.5, vjust = 0.8, angle = 0)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment