Created
March 16, 2022 15:38
-
-
Save bayesball/5fd5e2f726bb6d7972443c4cf8e5ea3b to your computer and use it in GitHub Desktop.
Fit of a nonnested multilevel model using the lme4 package
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load in packages | |
library(readr) | |
library(dplyr) | |
library(lme4) | |
library(ggplot2) | |
library(posterior) | |
library(gridExtra) | |
library(Lahman) | |
# load 2021 data and Fangraphs data | |
load("~/Dropbox/Google Drive/Retrosheet/pbp.2021.Rdata") | |
fg_guts <- | |
read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/fg_guts.csv") | |
# only consider batting events and collect | |
# key variables | |
d2021 %>% | |
filter(BAT_EVENT_FL == TRUE) %>% | |
select(PIT_ID, BAT_ID, EVENT_CD) %>% | |
mutate(Season = 2021) -> d2021a | |
# compute wOBA weights | |
d2021a %>% | |
inner_join(fg_guts, by = "Season") %>% | |
mutate(WT = wBB * (EVENT_CD %in% 14:15) + | |
wHBP * (EVENT_CD == 16) + | |
w1B * (EVENT_CD == 20) + | |
w2B * (EVENT_CD == 21) + | |
w3B * (EVENT_CD == 22) + | |
wHR * (EVENT_CD == 23)) %>% | |
select(PIT_ID, BAT_ID, EVENT_CD, WT) -> d2021a | |
# lmer fit | |
fitB <- lmer(WT ~ 1 + (1 | PIT_ID) + (1 | BAT_ID), | |
data = d2021a) | |
# variance components | |
VarCorr(fitB) | |
# work with random effects | |
RE <- ranef(fitB) | |
# create data frames of random effect estimates | |
S_batter <- data.frame(Var = row.names(RE$BAT_ID), | |
mean = RE$BAT_ID[, 1]) | |
S_pitcher <- data.frame(Var = row.names(RE$PIT_ID), | |
mean = RE$PIT_ID[, 1]) | |
# collect PA and raw wOBA estimates | |
d2021a %>% | |
group_by(PIT_ID) %>% | |
summarize(PA = n(), | |
Estimate = mean(WT)) -> | |
pitcher_estimates | |
d2021a %>% | |
group_by(BAT_ID) %>% | |
summarize(PA = n(), | |
Estimate = mean(WT)) -> | |
batter_estimates | |
# merge raw and RE estimate tables | |
inner_join(batter_estimates, S_batter, | |
by = c("BAT_ID" = "Var")) -> | |
batter_estimates | |
inner_join(pitcher_estimates, S_pitcher, | |
by = c("PIT_ID" = "Var")) -> | |
pitcher_estimates | |
# some graphs -- first a couple of utility functions | |
centertitle <- function(){ | |
theme(plot.title = element_text(colour = "blue", | |
size = 18, | |
hjust = 0.5, | |
vjust = 0.8, | |
angle = 0)) | |
} | |
increasefont <-function(){ | |
theme(text=element_text(size=18)) | |
} | |
# scatterplot of raw wOBA estimates and random effects | |
ggplot(pitcher_estimates, | |
aes(Estimate, mean, color = PA)) + | |
geom_point() + | |
increasefont() | |
# plot of raw wOBA and random effects against PA | |
p1 <- ggplot(pitcher_estimates, aes(PA, Estimate)) + | |
geom_point() + | |
increasefont() + | |
centertitle() + | |
ggtitle("Raw Pitcher Estimates") | |
p2 <- ggplot(pitcher_estimates, aes(PA, mean)) + | |
geom_point() + | |
geom_smooth(color = "blue") + | |
geom_hline(yintercept = 0, color = "red") + | |
increasefont() + | |
centertitle() + | |
ggtitle("Pitcher Random Effects") + | |
ylab("Random Effect") | |
grid.arrange(p1, p2) | |
# do this for batters | |
q1 <- ggplot(batter_estimates, aes(PA, Estimate)) + | |
geom_point() + | |
increasefont() + | |
centertitle() + | |
ggtitle("Raw Batter Estimates") | |
q2 <- ggplot(batter_estimates, aes(PA, mean)) + | |
geom_point() + | |
geom_smooth(color = "blue") + | |
geom_hline(yintercept = 0, color = "red") + | |
increasefont() + | |
centertitle() + | |
ggtitle("Batter Random Effects") + | |
ylab("Random Effect") | |
grid.arrange(q1, q2) | |
# best pitchers and hitters | |
# first add names | |
inner_join(pitcher_estimates, | |
select(Master, retroID, | |
nameFirst, nameLast), | |
by = c("PIT_ID" = "retroID")) %>% | |
mutate(Name = paste(nameFirst, nameLast)) %>% | |
select(Name, PA, Estimate, mean) -> | |
pitcher_estimates | |
inner_join(batter_estimates, | |
select(Master, retroID, | |
nameFirst, nameLast), | |
by = c("BAT_ID" = "retroID")) %>% | |
mutate(Name = paste(nameFirst, nameLast)) %>% | |
select(Name, PA, Estimate, mean) -> | |
batter_estimates | |
# create top 10 lists | |
pitcher_estimates %>% | |
arrange(mean) %>% | |
head(10) | |
batter_estimates %>% | |
arrange(desc(mean)) %>% | |
head(10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment