Created
March 24, 2020 17:18
-
-
Save bayesball/0f2950320f4093827c3e334656f88153 to your computer and use it in GitHub Desktop.
BA on Balls in Play, Launch Conditions, and Random Effects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load some packages | |
library(tidyverse) | |
library(CalledStrike) | |
library(mgcv) | |
library(metR) | |
# read in the Statcast data | |
statcast2019 <- read_csv("~/Dropbox/2016 WORK/BLOG Baseball R/OTHER/StatcastData/statcast2019.csv") | |
# only consider balls in play | |
statcast2019 %>% | |
filter(type == "X") -> scip | |
hits <- c("single", "double", "triple", | |
"home_run") | |
# define a hit variable and exclude home runs | |
scip %>% | |
mutate(H = ifelse(events %in% hits, 1, 0)) %>% | |
filter(events != "home_run") -> | |
scip | |
# choose random sample of 100 players | |
# with at least 300 in play | |
scip %>% | |
group_by(player_name) %>% | |
summarize(N = n()) -> S | |
S300 <- filter(S, N >= 300) | |
set.seed(1234) | |
players <- sample(S300$player_name, | |
size = 100, | |
replace = FALSE) | |
# here is the dataset for only these players | |
scip_new <- filter(scip, | |
player_name %in% players) | |
# fit model to LS, LA | |
fit <- gam(H ~ s(launch_angle, launch_speed), | |
data = scip_new, | |
family = binomial) | |
# create a fancy graph of the predicted | |
# hit probabilities | |
df <- expand.grid(launch_angle = seq(-10, 50, length = 50), | |
launch_speed = seq(50, 115, length = 50)) | |
df$Probability <- predict(fit, df, type = "response") | |
BR <- seq(0.025, 0.975, by = 0.05) | |
ggplot(df) + geom_contour_fill(aes(x = launch_angle, | |
y = launch_speed, | |
z = Probability), | |
breaks = BR, | |
size = 1.5) + | |
scale_fill_distiller(palette = "Spectral") + | |
ggtitle("Probability of Hit of Balls in Play") + | |
centertitle() + | |
increasefont() + | |
labs(fill = "Prob(Hit)") + | |
xlab("Launch Angle (degrees)") + | |
ylab("Launch Speed (mph)") | |
# fitting the Regression random effects model | |
scip_new$Player <- factor(scip_new$player_name) | |
fit2 <- gam(H ~ s(launch_angle, launch_speed) + | |
s(Player, bs = "re"), | |
data = scip_new, | |
family = binomial) | |
# collect the random effects estimates | |
RE2 <- coef(fit2)[31:130] | |
# fitting the Constant random effects model | |
scip_new$Player <- factor(scip_new$player_name) | |
fit3 <- gam(H ~ s(Player, bs = "re"), | |
data = scip_new, | |
family = binomial) | |
# collect the random effects estimates | |
RE3 <- coef(fit3)[-1] | |
Common <- coef(fit3)[1] | |
gam.vcomp(fit3) | |
# graph showing the shrinkage | |
# plot probs (reff model) against observed BABIP | |
scip_new %>% | |
group_by(Player) %>% | |
summarize(N = n(), HITS = sum(H)) -> S3 | |
S3$RE3 <- RE3 | |
S3$Prob <- exp(S3$RE + Common) / | |
(1 + exp(S3$RE + Common)) | |
ggplot(S3, aes(HITS / N, Prob, label = Player)) + | |
geom_point() + | |
geom_abline(slope = 1, intercept = 0, | |
color = "red") + | |
xlim(.2, .425) + | |
geom_label(data = filter(S3, | |
Prob > .33 | Prob < .27 )) + | |
xlab("Observed BABIP") + | |
ylab("Probability Estimate") + | |
increasefont() + | |
centertitle() + | |
ggtitle("Estimates Using a Random Effects Model") | |
# scatterplot of two sets of random effects | |
S3$RE2 <- RE2 | |
S3 %>% arrange(RE) %>% View() | |
ggplot(S3, aes(RE3, RE2, label = Player)) + | |
geom_point() + | |
increasefont() + | |
ggtitle("Scatterplot of Two Sets of Random Effects") + | |
centertitle() + | |
xlim(-.15, .15) + | |
ylim(-.15, .15) + | |
xlab("Constant Model") + | |
ylab("Regression Model") + | |
geom_label(data = filter(S3, | |
RE3 > .1, | |
RE2 < .05 )) + | |
geom_label(data = filter(S3, | |
RE3 < 0, | |
RE2 > .05 )) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment