Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active April 4, 2019 19:54
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bayesball/114d23f7498f2f69caca to your computer and use it in GitHub Desktop.
Save bayesball/114d23f7498f2f69caca to your computer and use it in GitHub Desktop.
###############################################
# working with complete set of 2015 play-by-play data
# collected using the getData function in the openWAR
# package (retrieves MLBAM GameDay files)
# currently have this saved as a Rdata file
###############################################
load("alldata2015.Rdata")
# computes the run values of all plate appearances
library(dplyr)
d2015 <- mutate(d2015,
R1B=!is.na(start1B),
R2B=!is.na(start2B),
R3B=!is.na(start3B),
E1B=!is.na(end1B),
E2B=!is.na(end2B),
E3B=!is.na(end3B))
(runs_expectancy <- summarize(group_by(d2015,
startOuts, R1B, R2B, R3B),
Runs=mean(runsFuture)))
d2015 <- inner_join(d2015, runs_expectancy,
by=c("startOuts", "R1B", "R2B", "R3B"))
d2015 <- inner_join(d2015, runs_expectancy,
by=c("endOuts"="startOuts",
"E1B"="R1B",
"E2B"="R2B",
"E3B"="R3B"))
d2015 <- mutate(d2015,
Runs=Runs.y - Runs.x + runsOnPlay)
###############################
# adjustment for ballpark
###############################
# look at batter means
S <- summarize(group_by(d2015, batterName),
R=mean(Runs), N=n())
# get team identifier for each player
d2015 <- mutate(d2015,
bat_team=ifelse(half=="top",
as.character(away_team),
as.character(home_team)))
TeamData <- summarize(group_by(d2015, batterName),
Team=names(sort(table(bat_team), decreasing=TRUE))[1])
S1 <- inner_join(S, TeamData,
by="batterName")
# plots mean run values against PA for all players
# with Rockie players identified
library(ggplot2)
ggplot(S1, aes(N, R)) + geom_point(alpha=.2) +
ylim(c(-.15, .15)) + geom_smooth() +
geom_point(data=filter(S1, Team=="col"),
aes(N, R), color="red") +
ggtitle("Mean Run Values for all Players in 2015 Season\nRockies Players in Red")
###### regress run values on bat team ids and computes residuals
fit <- lm(Runs ~ 0 + bat_team, data=d2015)
d2015$Residual <- fit$residuals
R <- summarize(group_by(d2015, batterName),
Residual=mean(Residual), N=n())
R1 <- inner_join(R, TeamData,
by="batterName")
# plots mean residual values against PA for all players
# with Rockie players identified
ggplot(R1, aes(N, Residual)) + geom_point(alpha=.2) +
ylim(c(-.15, .15)) + geom_smooth() +
geom_point(data=filter(R1, Team=="col"),
aes(N, Residual), color="red") +
ggtitle("Mean Residual Values Adjusted for Ballpark\nRockies Players in Red")
# look at arm and batter side effects
A <- summarize(group_by(d2015, batterName, stand, throws),
Mean=mean(Runs), N=n())
A <- mutate(A, Platoon=paste(stand, 'hitter against',
throws, 'pitcher'))
A.mean <- summarize(group_by(A, Platoon),
Mean=sum(Mean * N) / sum(N))
# graphs mean runs and PA for all players in four platoon
# situations
ggplot(A, aes(N, Mean)) + geom_point() +
facet_wrap(~ Platoon, ncol=2)+ ylim(c(-.15, .15)) +
geom_hline(yintercept=0) +
geom_hline(aes(yintercept=Mean), data=A.mean, color="red") +
ggtitle("Mean Run Values for all Players in 2015 Season\nPlatoon Effects")
d2015 <- mutate(d2015, platoon=factor(paste(stand, throws)))
fit2 <- lm(Runs ~ 0 + platoon, data=d2015)
d2015$Residual2 <- fit2$residuals
A1 <- summarize(group_by(d2015, batterName, stand, throws),
Mean=mean(Residual2), N=n())
A1 <- mutate(A1, Platoon=paste(stand, 'hitter against', throws, 'pitcher'))
A1.mean <- summarize(group_by(A1, Platoon),
Mean=sum(Mean * N) / sum(N))
# graphs mean residuals and PA for all players in four platoon
# situations
ggplot(A1, aes(N, Mean)) + geom_point() +
facet_wrap(~ Platoon, ncol=2)+ ylim(c(-.15, .15)) +
geom_hline(yintercept=0) +
geom_hline(aes(yintercept=Mean), data=A1.mean, color="red") +
ggtitle("Mean Residual Values for all Players in 2015 Season\nPlatoon Effects")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment