Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created March 24, 2020 17:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/0f2950320f4093827c3e334656f88153 to your computer and use it in GitHub Desktop.
Save bayesball/0f2950320f4093827c3e334656f88153 to your computer and use it in GitHub Desktop.
BA on Balls in Play, Launch Conditions, and Random Effects
# load some packages
library(tidyverse)
library(CalledStrike)
library(mgcv)
library(metR)
# read in the Statcast data
statcast2019 <- read_csv("~/Dropbox/2016 WORK/BLOG Baseball R/OTHER/StatcastData/statcast2019.csv")
# only consider balls in play
statcast2019 %>%
filter(type == "X") -> scip
hits <- c("single", "double", "triple",
"home_run")
# define a hit variable and exclude home runs
scip %>%
mutate(H = ifelse(events %in% hits, 1, 0)) %>%
filter(events != "home_run") ->
scip
# choose random sample of 100 players
# with at least 300 in play
scip %>%
group_by(player_name) %>%
summarize(N = n()) -> S
S300 <- filter(S, N >= 300)
set.seed(1234)
players <- sample(S300$player_name,
size = 100,
replace = FALSE)
# here is the dataset for only these players
scip_new <- filter(scip,
player_name %in% players)
# fit model to LS, LA
fit <- gam(H ~ s(launch_angle, launch_speed),
data = scip_new,
family = binomial)
# create a fancy graph of the predicted
# hit probabilities
df <- expand.grid(launch_angle = seq(-10, 50, length = 50),
launch_speed = seq(50, 115, length = 50))
df$Probability <- predict(fit, df, type = "response")
BR <- seq(0.025, 0.975, by = 0.05)
ggplot(df) + geom_contour_fill(aes(x = launch_angle,
y = launch_speed,
z = Probability),
breaks = BR,
size = 1.5) +
scale_fill_distiller(palette = "Spectral") +
ggtitle("Probability of Hit of Balls in Play") +
centertitle() +
increasefont() +
labs(fill = "Prob(Hit)") +
xlab("Launch Angle (degrees)") +
ylab("Launch Speed (mph)")
# fitting the Regression random effects model
scip_new$Player <- factor(scip_new$player_name)
fit2 <- gam(H ~ s(launch_angle, launch_speed) +
s(Player, bs = "re"),
data = scip_new,
family = binomial)
# collect the random effects estimates
RE2 <- coef(fit2)[31:130]
# fitting the Constant random effects model
scip_new$Player <- factor(scip_new$player_name)
fit3 <- gam(H ~ s(Player, bs = "re"),
data = scip_new,
family = binomial)
# collect the random effects estimates
RE3 <- coef(fit3)[-1]
Common <- coef(fit3)[1]
gam.vcomp(fit3)
# graph showing the shrinkage
# plot probs (reff model) against observed BABIP
scip_new %>%
group_by(Player) %>%
summarize(N = n(), HITS = sum(H)) -> S3
S3$RE3 <- RE3
S3$Prob <- exp(S3$RE + Common) /
(1 + exp(S3$RE + Common))
ggplot(S3, aes(HITS / N, Prob, label = Player)) +
geom_point() +
geom_abline(slope = 1, intercept = 0,
color = "red") +
xlim(.2, .425) +
geom_label(data = filter(S3,
Prob > .33 | Prob < .27 )) +
xlab("Observed BABIP") +
ylab("Probability Estimate") +
increasefont() +
centertitle() +
ggtitle("Estimates Using a Random Effects Model")
# scatterplot of two sets of random effects
S3$RE2 <- RE2
S3 %>% arrange(RE) %>% View()
ggplot(S3, aes(RE3, RE2, label = Player)) +
geom_point() +
increasefont() +
ggtitle("Scatterplot of Two Sets of Random Effects") +
centertitle() +
xlim(-.15, .15) +
ylim(-.15, .15) +
xlab("Constant Model") +
ylab("Regression Model") +
geom_label(data = filter(S3,
RE3 > .1,
RE2 < .05 )) +
geom_label(data = filter(S3,
RE3 < 0,
RE2 > .05 ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment