Created
November 20, 2017 11:59
-
-
Save bayesball/7cbd17b6a1a61e9cc96d453182022080 to your computer and use it in GitHub Desktop.
R code to fit generalized additive model to Statcast data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load in packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(mgcv) | |
##### read in a theme for the title of my plots | |
TH <- theme(plot.title = element_text(hjust = 0.5, size = 18)) | |
##### read in data | |
##### I use the scrape_statcast_savant_batter_all function in | |
##### the baseballr data to collect the data for the 2017 season | |
sc_all <- read_csv("statcast2017.csv") | |
sc_ip <- filter(sc_all, type == "X") | |
sc_ip %>% mutate(outcome = ifelse(events %in% | |
c("single", "double", "triple", "home_run"), 1, 0)) -> sc_ip | |
##### basic exploratory plot | |
ggplot(slice(sc_ip, 1:2000), | |
aes(launch_angle, launch_speed, color=outcome)) + | |
geom_point() + | |
xlim(-25, 50) + | |
ylim(40, 120) + | |
ggtitle("2000 Balls Put In Play") + TH | |
##### some modeling | |
##### work with a sample of 60,000 | |
sc_ip_sample <- slice(sc_ip, 1:60000) | |
sc_ip_test <- slice(sc_ip, 60001:129365) | |
##### fit gam model | |
fit <- gam(outcome ~ s(launch_angle, launch_speed), | |
family = binomial, | |
data = sc_ip_sample) | |
##### convert the predictions to the probability scale | |
invlogit <- function(x) exp(x) / (1 + exp(x)) | |
##### what is the probability of a hit when the | |
##### launch angle is 20 degrees and the exit velocitiy is 100? | |
invlogit(predict(fit, data.frame(launch_angle = 20, | |
launch_speed = 100))) | |
##### predict over grid | |
df_p <- expand.grid(launch_angle = seq(-20, 50, length=50), | |
launch_speed = seq(40, 120, length=50)) | |
df_p$lp <- predict(fit, df_p) | |
df_p$Probability <- exp(df_p$lp) / (1 + exp(df_p$lp)) | |
# change colors and use finer grid | |
ggplot(df_p, aes(x=launch_angle, y=launch_speed, | |
z=Probability)) + | |
stat_contour(geom="polygon", | |
breaks=seq(0, 1, length.out = 21), | |
size=1.5, | |
aes(fill=..level..)) + | |
scale_fill_gradientn(colours = heat.colors(10)) + | |
geom_vline(xintercept = 0, color="black") + | |
xlim(-25, 50) + | |
ylim(40, 120) + | |
ggtitle("Contour Plot of Probability of Hit") + TH | |
############### evaluate several prediction rules | |
sc_ip_test$lp <- predict(fit, sc_ip_test) | |
sc_ip_test$predict <- ifelse(sc_ip_test$lp > 0, 1, 0) | |
sc_ip_test %>% | |
summarize(Rate = mean(outcome == predict, na.rm=TRUE)) | |
# compare to other rules | |
sc_ip_test %>% summarize(mean(outcome, na.rm = TRUE)) | |
sc_ip_test$rule1 <- ifelse(sc_ip_test$launch_speed > 100, | |
1, 0) | |
sc_ip_test %>% | |
summarize(Rate1 = mean(outcome == rule1, na.rm=TRUE)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment