Created
April 3, 2016 01:31
-
-
Save bayesball/4901468d7ceda8489da57ec26fe42457 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################### | |
# getting 2015 retrosheet data | |
# and computing runs expectancies | |
# assuming chadwick files are installed | |
library(devtools) | |
setwd("~/Desktop/retrosheet") | |
source_gist("https://gist.github.com/bayesball/8892981") | |
source_gist("https://gist.github.com/bayesball/8892999") | |
parse.retrosheet2.pbp(2015) | |
setwd("download.folder/unzipped") | |
d2015 <- compute.runs.expectancy(2015) | |
#################################### | |
# saved in d2015.Rdata | |
##################################### | |
library(dplyr) | |
library(ggplot2) | |
d2015 <- mutate(d2015, | |
BAT_TEAM_ID=ifelse(BAT_HOME_ID == 0, | |
as.character(AWAY_TEAM_ID), | |
substr(as.character(GAME_ID), 1, 3)), | |
League=ifelse(BAT_TEAM_ID %in% | |
c("OAK", "BOS", "TBA", "NYA", "BAL", | |
"HOU", "CHA", "KCA", "SEA", "DET", | |
"ANA", "MIN", "TOR", "CLE", "TEX"), | |
"AL", "NL"), | |
BAT_ORDER=factor(BAT_LINEUP_ID)) | |
bunts <- filter(d2015, BUNT_FL==TRUE) | |
# what teams are bunting? | |
teams <- sort(table(bunts$BAT_TEAM_ID)) | |
bunts$BAT_TEAM <- factor(bunts$BAT_TEAM_ID, | |
levels = names(teams)) | |
ggplot(bunts, aes(BAT_TEAM, fill=factor(League))) + | |
geom_bar() + coord_flip() + | |
ggtitle("Number of Team Bunts, 2015 Season") | |
# who is bunting? Look at batting order position | |
ggplot(bunts, aes(BAT_ORDER)) + | |
geom_bar() + coord_flip() + | |
ggtitle("Team Bunts by Bat Order Position") | |
# are there specific players who like to bunt? | |
S <- filter(summarize(group_by(bunts, BAT_ID), | |
N=n(), | |
SH=sum(SH_FL), | |
H=sum(H_FL), | |
Success_Rate = round(100 * (H + SH) / N)), N > 19) | |
master <- read_csv("~/Desktop/STUFF/Lahman2015/Master.csv") | |
Top <- inner_join(S, select(master, retroID, nameFirst, nameLast), | |
c("BAT_ID" = "retroID")) | |
arrange(select(Top, nameFirst, nameLast, N, SH, H, Success_Rate), | |
desc(N)) | |
# when are they bunting? | |
# Inning? | |
ggplot(filter(bunts, INN_CT < 10), aes(factor(INN_CT))) + | |
geom_bar() + coord_flip() + | |
ggtitle("Team Bunts by Inning") | |
# game state (bases occupied and outs)? | |
S <- summarize(group_by(bunts, STATE), | |
N=n(), SH=sum(SH_FL==TRUE), | |
H=sum(H_FL), | |
Success_Rate = round(100 * (SH + H) / N), | |
Type=ifelse(SH > H, "Sacrifice", "Hit")) | |
S50 <- filter(S, N >= 50) | |
ggplot(S50, aes(STATE, Success_Rate, color=Type)) + geom_point(size=3) + | |
coord_flip() + | |
ggtitle("Success Rates for Different Base/Out Situations") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment