Last active
April 4, 2019 19:54
-
-
Save bayesball/114d23f7498f2f69caca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################### | |
# working with complete set of 2015 play-by-play data | |
# collected using the getData function in the openWAR | |
# package (retrieves MLBAM GameDay files) | |
# currently have this saved as a Rdata file | |
############################################### | |
load("alldata2015.Rdata") | |
# computes the run values of all plate appearances | |
library(dplyr) | |
d2015 <- mutate(d2015, | |
R1B=!is.na(start1B), | |
R2B=!is.na(start2B), | |
R3B=!is.na(start3B), | |
E1B=!is.na(end1B), | |
E2B=!is.na(end2B), | |
E3B=!is.na(end3B)) | |
(runs_expectancy <- summarize(group_by(d2015, | |
startOuts, R1B, R2B, R3B), | |
Runs=mean(runsFuture))) | |
d2015 <- inner_join(d2015, runs_expectancy, | |
by=c("startOuts", "R1B", "R2B", "R3B")) | |
d2015 <- inner_join(d2015, runs_expectancy, | |
by=c("endOuts"="startOuts", | |
"E1B"="R1B", | |
"E2B"="R2B", | |
"E3B"="R3B")) | |
d2015 <- mutate(d2015, | |
Runs=Runs.y - Runs.x + runsOnPlay) | |
############################### | |
# adjustment for ballpark | |
############################### | |
# look at batter means | |
S <- summarize(group_by(d2015, batterName), | |
R=mean(Runs), N=n()) | |
# get team identifier for each player | |
d2015 <- mutate(d2015, | |
bat_team=ifelse(half=="top", | |
as.character(away_team), | |
as.character(home_team))) | |
TeamData <- summarize(group_by(d2015, batterName), | |
Team=names(sort(table(bat_team), decreasing=TRUE))[1]) | |
S1 <- inner_join(S, TeamData, | |
by="batterName") | |
# plots mean run values against PA for all players | |
# with Rockie players identified | |
library(ggplot2) | |
ggplot(S1, aes(N, R)) + geom_point(alpha=.2) + | |
ylim(c(-.15, .15)) + geom_smooth() + | |
geom_point(data=filter(S1, Team=="col"), | |
aes(N, R), color="red") + | |
ggtitle("Mean Run Values for all Players in 2015 Season\nRockies Players in Red") | |
###### regress run values on bat team ids and computes residuals | |
fit <- lm(Runs ~ 0 + bat_team, data=d2015) | |
d2015$Residual <- fit$residuals | |
R <- summarize(group_by(d2015, batterName), | |
Residual=mean(Residual), N=n()) | |
R1 <- inner_join(R, TeamData, | |
by="batterName") | |
# plots mean residual values against PA for all players | |
# with Rockie players identified | |
ggplot(R1, aes(N, Residual)) + geom_point(alpha=.2) + | |
ylim(c(-.15, .15)) + geom_smooth() + | |
geom_point(data=filter(R1, Team=="col"), | |
aes(N, Residual), color="red") + | |
ggtitle("Mean Residual Values Adjusted for Ballpark\nRockies Players in Red") | |
# look at arm and batter side effects | |
A <- summarize(group_by(d2015, batterName, stand, throws), | |
Mean=mean(Runs), N=n()) | |
A <- mutate(A, Platoon=paste(stand, 'hitter against', | |
throws, 'pitcher')) | |
A.mean <- summarize(group_by(A, Platoon), | |
Mean=sum(Mean * N) / sum(N)) | |
# graphs mean runs and PA for all players in four platoon | |
# situations | |
ggplot(A, aes(N, Mean)) + geom_point() + | |
facet_wrap(~ Platoon, ncol=2)+ ylim(c(-.15, .15)) + | |
geom_hline(yintercept=0) + | |
geom_hline(aes(yintercept=Mean), data=A.mean, color="red") + | |
ggtitle("Mean Run Values for all Players in 2015 Season\nPlatoon Effects") | |
d2015 <- mutate(d2015, platoon=factor(paste(stand, throws))) | |
fit2 <- lm(Runs ~ 0 + platoon, data=d2015) | |
d2015$Residual2 <- fit2$residuals | |
A1 <- summarize(group_by(d2015, batterName, stand, throws), | |
Mean=mean(Residual2), N=n()) | |
A1 <- mutate(A1, Platoon=paste(stand, 'hitter against', throws, 'pitcher')) | |
A1.mean <- summarize(group_by(A1, Platoon), | |
Mean=sum(Mean * N) / sum(N)) | |
# graphs mean residuals and PA for all players in four platoon | |
# situations | |
ggplot(A1, aes(N, Mean)) + geom_point() + | |
facet_wrap(~ Platoon, ncol=2)+ ylim(c(-.15, .15)) + | |
geom_hline(yintercept=0) + | |
geom_hline(aes(yintercept=Mean), data=A1.mean, color="red") + | |
ggtitle("Mean Residual Values for all Players in 2015 Season\nPlatoon Effects") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment