Last active
April 14, 2019 10:32
-
-
Save bayesball/9db4998b1c8030b1fc42 to your computer and use it in GitHub Desktop.
R code to estimate platoon ability distribution using random effects model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# code for estimating platoon effects for 2014 season | |
# one needs Retrosheet play-by-play for this season | |
# also uses the Master file in the Lahman database to get the player batting sides | |
# last, one needs a file from fangraphs to compute the weights in the wOBA formula | |
library(dplyr) | |
# retrieved wOBA weights from http://www.fangraphs.com/guts.aspx?type=cn | |
# stored in file fangraphs.csv | |
fangraphs <- read.csv("~/Dropbox/2014 WORK/situational/fangraphs.csv") | |
wt <- filter(fangraphs, Season==2014) | |
# load in 2014 retrosheet data and limit to batting events | |
# record the different batting events | |
load("~/OneDriveBusiness/Retrosheet/pbp.2014.Rdata") | |
filter(pbp.14, BAT_EVENT_FL==TRUE) %>% | |
mutate(uBB=(EVENT_CD==14), HBP=(EVENT_CD==16), | |
X1B=(EVENT_CD==20), X2B=(EVENT_CD==21), | |
X3B=(EVENT_CD==22), HR=(EVENT_CD==23), | |
SF=(SF_FL==TRUE), AB=(AB_FL==TRUE)) -> pbp.b | |
# compute value of each PA using fangraphs weights | |
filter(pbp.b, AB + uBB + SF + HBP > 0) %>% | |
mutate(Y = wt$wBB * uBB + wt$wHBP * HBP + | |
wt$w1B * X1B + wt$w2B * X2B + wt$w3B * X3B + | |
wt$wHR * HR) -> pbp.b | |
# add in batting side information from Lahman's database | |
Master <- read.csv("~/OneDriveBusiness/lahman-csv_2015-01-24/Master.csv") | |
pbp.b <- merge(pbp.b, Master, by.x="BAT_ID", by.y="retroID") | |
siteffects <- function(pbp, side, N=150){ | |
require(LearnBayes) | |
require(dplyr) | |
require(tidyr) | |
require(ggplot2) | |
# define the opposite side variable | |
pbp.R <- filter(pbp, bats==side) | |
opp.side <- ifelse(side=="R", "L", "R") | |
pbp.R$Opposite <- with(pbp.R, ifelse(PIT_HAND_CD==opp.side, 1, 0)) | |
# find the number of PA against each side; only keep the | |
# data for players who had at least N appearances against each side | |
summarize(group_by(pbp.R, BAT_ID, Opposite), N=length(Y)) %>% | |
spread(Opposite, N) -> S | |
names(S)[2:3] <- c("N.same", "N.opposite") | |
merge(pbp.R, S, by="BAT_ID") %>% | |
filter(N.same >= N, N.opposite >= N) -> pbp.Ra | |
# regression fit for a single batter, return the estimate of slope | |
# and the corresponding standard error | |
one.fit <- function(batter){ | |
fit <- lm(Y ~ Opposite, data=filter(pbp.Ra, BAT_ID==batter)) | |
c(coef(fit)[2], vcov(fit)[2, 2]) | |
} | |
# find regression estimates for all players | |
SC <- t(sapply(unique(pbp.Ra$BAT_ID), one.fit)) | |
rownames(SC) <- unique(pbp.Ra$BAT_ID) | |
SC.df <- data.frame(Name=rownames(SC), Effect=SC[, 1], SE=sqrt(SC[, 2])) | |
rownames(SC.df) <- NULL | |
# fit the random effects model -- output the estimates of mu and tau | |
# find improved estimates at the platoon effects for all players | |
fit <- laplace(normnormexch, c(0, 0), | |
cbind(SC.df[, 2], SC.df[, 3] ^ 2)) | |
mu <- fit$mode[1] | |
tau <- exp(fit$mode[2]) | |
SC.df$Estimate <- with(SC.df, (Effect / SE ^ 2 + mu / tau ^ 2) / | |
(1 / SE ^ 2 + 1 / tau ^ 2)) | |
SC.df <- merge(SC.df, Master[, c("retroID", "nameLast")], | |
by.x="Name", by.y="retroID") | |
# construct a Cleveland-style dotplot | |
# of the observed and improved platoon effects | |
myplot <- ggplot(SC.df, aes(Effect, nameLast)) + | |
geom_point() + | |
geom_point(aes(Estimate, nameLast), color="red") + | |
geom_vline(xintercept = mu, color="blue") + | |
ggtitle(paste("2014 Platoon Effects for wOBA -- Batting", | |
side)) + | |
xlab("wOBA advantage") + ylab("Name") | |
print(myplot) | |
c(mu=mu, tau=tau) | |
} | |
siteffects(pbp.b, "R") | |
siteffects(pbp.b, "L") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment