Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active April 14, 2019 10:32
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/9db4998b1c8030b1fc42 to your computer and use it in GitHub Desktop.
Save bayesball/9db4998b1c8030b1fc42 to your computer and use it in GitHub Desktop.
R code to estimate platoon ability distribution using random effects model
# code for estimating platoon effects for 2014 season
# one needs Retrosheet play-by-play for this season
# also uses the Master file in the Lahman database to get the player batting sides
# last, one needs a file from fangraphs to compute the weights in the wOBA formula
library(dplyr)
# retrieved wOBA weights from http://www.fangraphs.com/guts.aspx?type=cn
# stored in file fangraphs.csv
fangraphs <- read.csv("~/Dropbox/2014 WORK/situational/fangraphs.csv")
wt <- filter(fangraphs, Season==2014)
# load in 2014 retrosheet data and limit to batting events
# record the different batting events
load("~/OneDriveBusiness/Retrosheet/pbp.2014.Rdata")
filter(pbp.14, BAT_EVENT_FL==TRUE) %>%
mutate(uBB=(EVENT_CD==14), HBP=(EVENT_CD==16),
X1B=(EVENT_CD==20), X2B=(EVENT_CD==21),
X3B=(EVENT_CD==22), HR=(EVENT_CD==23),
SF=(SF_FL==TRUE), AB=(AB_FL==TRUE)) -> pbp.b
# compute value of each PA using fangraphs weights
filter(pbp.b, AB + uBB + SF + HBP > 0) %>%
mutate(Y = wt$wBB * uBB + wt$wHBP * HBP +
wt$w1B * X1B + wt$w2B * X2B + wt$w3B * X3B +
wt$wHR * HR) -> pbp.b
# add in batting side information from Lahman's database
Master <- read.csv("~/OneDriveBusiness/lahman-csv_2015-01-24/Master.csv")
pbp.b <- merge(pbp.b, Master, by.x="BAT_ID", by.y="retroID")
siteffects <- function(pbp, side, N=150){
require(LearnBayes)
require(dplyr)
require(tidyr)
require(ggplot2)
# define the opposite side variable
pbp.R <- filter(pbp, bats==side)
opp.side <- ifelse(side=="R", "L", "R")
pbp.R$Opposite <- with(pbp.R, ifelse(PIT_HAND_CD==opp.side, 1, 0))
# find the number of PA against each side; only keep the
# data for players who had at least N appearances against each side
summarize(group_by(pbp.R, BAT_ID, Opposite), N=length(Y)) %>%
spread(Opposite, N) -> S
names(S)[2:3] <- c("N.same", "N.opposite")
merge(pbp.R, S, by="BAT_ID") %>%
filter(N.same >= N, N.opposite >= N) -> pbp.Ra
# regression fit for a single batter, return the estimate of slope
# and the corresponding standard error
one.fit <- function(batter){
fit <- lm(Y ~ Opposite, data=filter(pbp.Ra, BAT_ID==batter))
c(coef(fit)[2], vcov(fit)[2, 2])
}
# find regression estimates for all players
SC <- t(sapply(unique(pbp.Ra$BAT_ID), one.fit))
rownames(SC) <- unique(pbp.Ra$BAT_ID)
SC.df <- data.frame(Name=rownames(SC), Effect=SC[, 1], SE=sqrt(SC[, 2]))
rownames(SC.df) <- NULL
# fit the random effects model -- output the estimates of mu and tau
# find improved estimates at the platoon effects for all players
fit <- laplace(normnormexch, c(0, 0),
cbind(SC.df[, 2], SC.df[, 3] ^ 2))
mu <- fit$mode[1]
tau <- exp(fit$mode[2])
SC.df$Estimate <- with(SC.df, (Effect / SE ^ 2 + mu / tau ^ 2) /
(1 / SE ^ 2 + 1 / tau ^ 2))
SC.df <- merge(SC.df, Master[, c("retroID", "nameLast")],
by.x="Name", by.y="retroID")
# construct a Cleveland-style dotplot
# of the observed and improved platoon effects
myplot <- ggplot(SC.df, aes(Effect, nameLast)) +
geom_point() +
geom_point(aes(Estimate, nameLast), color="red") +
geom_vline(xintercept = mu, color="blue") +
ggtitle(paste("2014 Platoon Effects for wOBA -- Batting",
side)) +
xlab("wOBA advantage") + ylab("Name")
print(myplot)
c(mu=mu, tau=tau)
}
siteffects(pbp.b, "R")
siteffects(pbp.b, "L")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment