Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Daniel Murphy -- Learning about his home run ability and predicting his home run output in the 2015 World Series
# Daniel Murphy Exercise
# Part I -- learning about Murphy's home run ability
# and updating this knowledge after the NLDS and NLCS
library(ggplot2)
library(LearnBayes)
# career home run data for Murphy
seasons <- c(2008, 2009, 2011:2015)
hr <- c(2, 12, 6, 6, 13,9, 14)
ab <- c(131, 508, 391, 571, 658, 596, 499)
murphy <- data.frame(seasons, hr, ab)
# fitting exchangeable model to this data
fit <- laplace(betabinexch, c(1, 1), cbind(hr, ab))
K <- exp(fit$mode[2])
eta <- exp(fit$mode[1]) / (1 + exp(fit$mode[1]))
# use the estimated random effects distribution as my initial prior for P
new.post <- c(K * eta, K * (1 - eta)) # beta shape parameters
# predictive simulation -- simulate [P] [Y | P]
# Y = number of home runs hit in 21 AB in NLDS
p.new <- rbeta(10000, new.post[1], new.post[2])
y.new <- rbinom(10000, size=21, prob=p.new)
ggplot(data.frame(P=p.new), aes(P)) + geom_density() +
ggtitle("MY PRIOR")
ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() +
geom_vline(xintercept = 3, color="red", size=3) +
ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLDS")
# we now observe HR data from first round - 3 out of 21
# update my prior
new.post2 <- new.post + c(3, 21 - 3)
# predictive simulation
# Y = number of home runs hit in 17 AB in NLCS
p.new2 <- rbeta(10000, new.post2[1], new.post2[2])
y.new2 <- rbinom(10000, size=17, prob=p.new2)
# graph comparing initial prior and updated prior
dp <- rbind(data.frame(Type="Prior", P=p.new),
data.frame(Type="New Prior", P=p.new2))
ggplot(dp, aes(P, color=Type)) + geom_density()
ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() +
geom_vline(xintercept = 4, color="red", size=3) +
ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLCS")
# Part II - predicting number of HR for 2015 World Series
# update again to get my prior after the results of the two playoffs
new.post3 <- new.post2 + c(4, 17-4)
curve(dbeta(x, new.post3[1], new.post3[2]), 0, 0.07,
xlab="P", ylab="Density", main="My Prior at Beginning of 2015 WS")
# construct a prior on number of games and number of at-bats
# Here is data I used to find the distribution of Murphy's game AB
#library(XML)
#d <- readHTMLTable('http://www.baseball-reference.com/players/gl.cgi?id=murphda08&t=b&year=2015')
#gamelogs <- d[[3]]
atbats <- c(3, 4, 5)
atbats.probs <- c(16, 79, 19) / (16 + 79 + 19)
# Here is the distribution of the length of the WS assuming
# teams of equal ability
length.series <- 4:7
length.series.probs <- c(2 * .5 ^ 4, 2 * 4 * .5 ^ 5,
2 * 10 * .5 ^ 6, 2 * 20 * .5 ^ 7)
# do 10,000 simulations of predictive AB
r.length.series <- sample(length.series, size=10000,
prob=length.series.probs,
replace=TRUE)
get.ab <- function(j)
sum(sample(atbats, size=r.length.series[j],
prob=atbats.probs, replace=TRUE))
r.atbats <- sapply(1:1000, get.ab)
# ready to simulate home runs
p.new3 <- rbeta(10000, new.post3[1], new.post3[2])
y <- rbinom(10000, size=r.atbats, prob=p.new3)
# tabulate and graph the predictive distribution
# for the number of home runs
TB <- round(table(y) / 10000, 3)
TB
K <- max(as.numeric(names(TB)))
O <- data.frame(Number_of_Homeruns=0:K,
Probability=round(as.numeric(table(y)) / 10000, 3))
qplot(Number_of_Homeruns, Probability,
data = O, geom = "segment",
yend = 0, xend = Number_of_Homeruns, size = I(10)) +
ggtitle("Predictive Distribution of Number of Murphy Home Runs in WS")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment