Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active December 16, 2016 20:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bayesball/2b70b09a447a3d06188b to your computer and use it in GitHub Desktop.
Save bayesball/2b70b09a447a3d06188b to your computer and use it in GitHub Desktop.
Finds estimates of true batting averages for all batters against a specific pitcher.
# loading in Retrosheet data for the seasons 1960 through 2013 from my website
load(url("http://bayes.bgsu.edu/baseball/pbp.1960.1979.Rdata"))
load(url("http://bayes.bgsu.edu/baseball/pbp.1980.1999.Rdata"))
load(url("http://bayes.bgsu.edu/baseball/pbp.2000.2013.Rdata"))
batter.matchup.ggplot <- function(Name, graph=TRUE, retroid=FALSE){
# this function assumes the data frames pbp.60.79, pbp.80.99, and pbp.00.13 are
# in the workspace
fit.model <- function(y, n){
require(LearnBayes)
fit <- laplace(betabinexch, c(1, 1), cbind(y, n))$mode
eta <- exp(fit[1]) / (1 + exp(fit[1]))
K <- exp(fit[2])
Estimate <- (y + K * eta) / (n + K)
list(K=K, eta=eta, Estimate=Estimate)
}
require(dplyr)
require(Lahman)
if(retroid==FALSE){
name <- unlist(strsplit(Name, split=" "))
retro.id <- filter(Master, nameLast==name[2],
nameFirst==name[1])$retroID[1]} else
retro.id <- Name
d1 <- filter(pbp.60.79, PIT_ID==retro.id, BAT_EVENT_FL==TRUE)
d2 <- filter(pbp.80.99, PIT_ID==retro.id, BAT_EVENT_FL==TRUE)
d3 <- filter(pbp.00.13, PIT_ID==retro.id, BAT_EVENT_FL==TRUE)
d <- rbind(d1, d2, d3)
S <- summarize(group_by(d, BAT_ID),
AB=sum(AB_FL), H=sum(H_FL >= 1))
S <- filter(S, AB > 0)
get.name <- function(j){
F <- filter(Master, retroID==S$BAT_ID[j])
paste(F$nameFirst, F$nameLast)}
S$Batter.Name <- sapply(1:dim(S)[1], get.name)
M <- fit.model(S$H, S$AB)
if(graph==TRUE){
print(c(eta=M$eta, K=M$K))
S$Estimate <- M$Estimate
S <- S[order(S$Estimate, decreasing=TRUE), ]
S$Num <- 1:dim(S)[1]
require(ggplot2)
pt <- ggplot(S, aes(AB, Estimate)) + geom_point(size=3) +
geom_hline(aes(yintercept=sum(H) / sum(AB)),
color="red", size=2) +
ggtitle(paste(Name, "Smoothed Averages")) +
theme(plot.title = element_text(size = rel(3),
color = "blue")) +
theme(axis.title = element_text(size = rel(2))) +
geom_text(data=subset(S, Num < 3 | Num > dim(S)[1] - 2),
aes(AB, Estimate, label=Batter.Name),
color="red")
print(pt)
print(head(S))
print(tail(S))
S} else c(M$K, M$eta)
}
@bayesball
Copy link
Author

This function assumes that the Retrosheet data frames pbp.60.79, pbp.80.99, and pbp.00.13 are in the current workspace. The data frame pbp.60.79 contains play-by-play data for the seasons 1960 through 1979, pbp.80.99 contains data for seasons 1980 through 1999, and pbp.00.13 contains data for seasons 2000 through 2013.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment