Skip to content

Instantly share code, notes, and snippets.

@rj-reilly
Forked from bayesball/batter.matchup.ggplot.R
Created December 16, 2016 20:23
Show Gist options
  • Save rj-reilly/4760f43d83af998e16f08ae016550621 to your computer and use it in GitHub Desktop.
Save rj-reilly/4760f43d83af998e16f08ae016550621 to your computer and use it in GitHub Desktop.
Finds estimates of true batting averages for all batters against a specific pitcher.
# loading in Retrosheet data for the seasons 1960 through 2013 from my website
load(url("http://bayes.bgsu.edu/baseball/pbp.1960.1979.Rdata"))
load(url("http://bayes.bgsu.edu/baseball/pbp.1980.1999.Rdata"))
load(url("http://bayes.bgsu.edu/baseball/pbp.2000.2013.Rdata"))
batter.matchup.ggplot <- function(Name, graph=TRUE, retroid=FALSE){
# this function assumes the data frames pbp.60.79, pbp.80.99, and pbp.00.13 are
# in the workspace
fit.model <- function(y, n){
require(LearnBayes)
fit <- laplace(betabinexch, c(1, 1), cbind(y, n))$mode
eta <- exp(fit[1]) / (1 + exp(fit[1]))
K <- exp(fit[2])
Estimate <- (y + K * eta) / (n + K)
list(K=K, eta=eta, Estimate=Estimate)
}
require(dplyr)
require(Lahman)
if(retroid==FALSE){
name <- unlist(strsplit(Name, split=" "))
retro.id <- filter(Master, nameLast==name[2],
nameFirst==name[1])$retroID[1]} else
retro.id <- Name
d1 <- filter(pbp.60.79, PIT_ID==retro.id, BAT_EVENT_FL==TRUE)
d2 <- filter(pbp.80.99, PIT_ID==retro.id, BAT_EVENT_FL==TRUE)
d3 <- filter(pbp.00.13, PIT_ID==retro.id, BAT_EVENT_FL==TRUE)
d <- rbind(d1, d2, d3)
S <- summarize(group_by(d, BAT_ID),
AB=sum(AB_FL), H=sum(H_FL >= 1))
S <- filter(S, AB > 0)
get.name <- function(j){
F <- filter(Master, retroID==S$BAT_ID[j])
paste(F$nameFirst, F$nameLast)}
S$Batter.Name <- sapply(1:dim(S)[1], get.name)
M <- fit.model(S$H, S$AB)
if(graph==TRUE){
print(c(eta=M$eta, K=M$K))
S$Estimate <- M$Estimate
S <- S[order(S$Estimate, decreasing=TRUE), ]
S$Num <- 1:dim(S)[1]
require(ggplot2)
pt <- ggplot(S, aes(AB, Estimate)) + geom_point(size=3) +
geom_hline(aes(yintercept=sum(H) / sum(AB)),
color="red", size=2) +
ggtitle(paste(Name, "Smoothed Averages")) +
theme(plot.title = element_text(size = rel(3),
color = "blue")) +
theme(axis.title = element_text(size = rel(2))) +
geom_text(data=subset(S, Num < 3 | Num > dim(S)[1] - 2),
aes(AB, Estimate, label=Batter.Name),
color="red")
print(pt)
print(head(S))
print(tail(S))
S} else c(M$K, M$eta)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment