Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created April 3, 2016 01:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/4901468d7ceda8489da57ec26fe42457 to your computer and use it in GitHub Desktop.
Save bayesball/4901468d7ceda8489da57ec26fe42457 to your computer and use it in GitHub Desktop.
###############################
# getting 2015 retrosheet data
# and computing runs expectancies
# assuming chadwick files are installed
library(devtools)
setwd("~/Desktop/retrosheet")
source_gist("https://gist.github.com/bayesball/8892981")
source_gist("https://gist.github.com/bayesball/8892999")
parse.retrosheet2.pbp(2015)
setwd("download.folder/unzipped")
d2015 <- compute.runs.expectancy(2015)
####################################
# saved in d2015.Rdata
#####################################
library(dplyr)
library(ggplot2)
d2015 <- mutate(d2015,
BAT_TEAM_ID=ifelse(BAT_HOME_ID == 0,
as.character(AWAY_TEAM_ID),
substr(as.character(GAME_ID), 1, 3)),
League=ifelse(BAT_TEAM_ID %in%
c("OAK", "BOS", "TBA", "NYA", "BAL",
"HOU", "CHA", "KCA", "SEA", "DET",
"ANA", "MIN", "TOR", "CLE", "TEX"),
"AL", "NL"),
BAT_ORDER=factor(BAT_LINEUP_ID))
bunts <- filter(d2015, BUNT_FL==TRUE)
# what teams are bunting?
teams <- sort(table(bunts$BAT_TEAM_ID))
bunts$BAT_TEAM <- factor(bunts$BAT_TEAM_ID,
levels = names(teams))
ggplot(bunts, aes(BAT_TEAM, fill=factor(League))) +
geom_bar() + coord_flip() +
ggtitle("Number of Team Bunts, 2015 Season")
# who is bunting? Look at batting order position
ggplot(bunts, aes(BAT_ORDER)) +
geom_bar() + coord_flip() +
ggtitle("Team Bunts by Bat Order Position")
# are there specific players who like to bunt?
S <- filter(summarize(group_by(bunts, BAT_ID),
N=n(),
SH=sum(SH_FL),
H=sum(H_FL),
Success_Rate = round(100 * (H + SH) / N)), N > 19)
master <- read_csv("~/Desktop/STUFF/Lahman2015/Master.csv")
Top <- inner_join(S, select(master, retroID, nameFirst, nameLast),
c("BAT_ID" = "retroID"))
arrange(select(Top, nameFirst, nameLast, N, SH, H, Success_Rate),
desc(N))
# when are they bunting?
# Inning?
ggplot(filter(bunts, INN_CT < 10), aes(factor(INN_CT))) +
geom_bar() + coord_flip() +
ggtitle("Team Bunts by Inning")
# game state (bases occupied and outs)?
S <- summarize(group_by(bunts, STATE),
N=n(), SH=sum(SH_FL==TRUE),
H=sum(H_FL),
Success_Rate = round(100 * (SH + H) / N),
Type=ifelse(SH > H, "Sacrifice", "Hit"))
S50 <- filter(S, N >= 50)
ggplot(S50, aes(STATE, Success_Rate, color=Type)) + geom_point(size=3) +
coord_flip() +
ggtitle("Success Rates for Different Base/Out Situations")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment