Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created March 16, 2022 15:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/5fd5e2f726bb6d7972443c4cf8e5ea3b to your computer and use it in GitHub Desktop.
Save bayesball/5fd5e2f726bb6d7972443c4cf8e5ea3b to your computer and use it in GitHub Desktop.
Fit of a nonnested multilevel model using the lme4 package
# load in packages
library(readr)
library(dplyr)
library(lme4)
library(ggplot2)
library(posterior)
library(gridExtra)
library(Lahman)
# load 2021 data and Fangraphs data
load("~/Dropbox/Google Drive/Retrosheet/pbp.2021.Rdata")
fg_guts <-
read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/fg_guts.csv")
# only consider batting events and collect
# key variables
d2021 %>%
filter(BAT_EVENT_FL == TRUE) %>%
select(PIT_ID, BAT_ID, EVENT_CD) %>%
mutate(Season = 2021) -> d2021a
# compute wOBA weights
d2021a %>%
inner_join(fg_guts, by = "Season") %>%
mutate(WT = wBB * (EVENT_CD %in% 14:15) +
wHBP * (EVENT_CD == 16) +
w1B * (EVENT_CD == 20) +
w2B * (EVENT_CD == 21) +
w3B * (EVENT_CD == 22) +
wHR * (EVENT_CD == 23)) %>%
select(PIT_ID, BAT_ID, EVENT_CD, WT) -> d2021a
# lmer fit
fitB <- lmer(WT ~ 1 + (1 | PIT_ID) + (1 | BAT_ID),
data = d2021a)
# variance components
VarCorr(fitB)
# work with random effects
RE <- ranef(fitB)
# create data frames of random effect estimates
S_batter <- data.frame(Var = row.names(RE$BAT_ID),
mean = RE$BAT_ID[, 1])
S_pitcher <- data.frame(Var = row.names(RE$PIT_ID),
mean = RE$PIT_ID[, 1])
# collect PA and raw wOBA estimates
d2021a %>%
group_by(PIT_ID) %>%
summarize(PA = n(),
Estimate = mean(WT)) ->
pitcher_estimates
d2021a %>%
group_by(BAT_ID) %>%
summarize(PA = n(),
Estimate = mean(WT)) ->
batter_estimates
# merge raw and RE estimate tables
inner_join(batter_estimates, S_batter,
by = c("BAT_ID" = "Var")) ->
batter_estimates
inner_join(pitcher_estimates, S_pitcher,
by = c("PIT_ID" = "Var")) ->
pitcher_estimates
# some graphs -- first a couple of utility functions
centertitle <- function(){
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5,
vjust = 0.8,
angle = 0))
}
increasefont <-function(){
theme(text=element_text(size=18))
}
# scatterplot of raw wOBA estimates and random effects
ggplot(pitcher_estimates,
aes(Estimate, mean, color = PA)) +
geom_point() +
increasefont()
# plot of raw wOBA and random effects against PA
p1 <- ggplot(pitcher_estimates, aes(PA, Estimate)) +
geom_point() +
increasefont() +
centertitle() +
ggtitle("Raw Pitcher Estimates")
p2 <- ggplot(pitcher_estimates, aes(PA, mean)) +
geom_point() +
geom_smooth(color = "blue") +
geom_hline(yintercept = 0, color = "red") +
increasefont() +
centertitle() +
ggtitle("Pitcher Random Effects") +
ylab("Random Effect")
grid.arrange(p1, p2)
# do this for batters
q1 <- ggplot(batter_estimates, aes(PA, Estimate)) +
geom_point() +
increasefont() +
centertitle() +
ggtitle("Raw Batter Estimates")
q2 <- ggplot(batter_estimates, aes(PA, mean)) +
geom_point() +
geom_smooth(color = "blue") +
geom_hline(yintercept = 0, color = "red") +
increasefont() +
centertitle() +
ggtitle("Batter Random Effects") +
ylab("Random Effect")
grid.arrange(q1, q2)
# best pitchers and hitters
# first add names
inner_join(pitcher_estimates,
select(Master, retroID,
nameFirst, nameLast),
by = c("PIT_ID" = "retroID")) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
select(Name, PA, Estimate, mean) ->
pitcher_estimates
inner_join(batter_estimates,
select(Master, retroID,
nameFirst, nameLast),
by = c("BAT_ID" = "retroID")) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
select(Name, PA, Estimate, mean) ->
batter_estimates
# create top 10 lists
pitcher_estimates %>%
arrange(mean) %>%
head(10)
batter_estimates %>%
arrange(desc(mean)) %>%
head(10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment