Predicting probability of hit from three variables

Predicting probability of hit from three variables
 # see blog post at # https://baseballwithr.wordpress.com/2018/08/21/r-code-for-probability-of-hit-given-three-variables/ # load in tidyverse package library(tidyverse) # read in the 2017 statcast data sc2017 <- read_csv("../StatcastData/statcast2017.csv") # only look at balls in play and define the hit variable sc2017 %>% filter(type == "X") %>% mutate(hit = ifelse(events %in% c("single", "double", "triple", "home_run"), 1, 0)) -> sc2017_ip # define spray angle (correct Petit's reexpression) sc2017_ip\$spray_angle <- with(sc2017_ip, round( (atan( (hc_x-125.42)/(198.27-hc_y) )*180/pi) ,1) ) # new def of spray angle that adjusts for side of batter sc2017_ip\$phi1 <- with(sc2017_ip, ifelse(stand == "L", -spray_angle, spray_angle)) # fit gam with three variables library(mgcv) fit <- gam(hit ~ s(launch_speed, launch_angle, phi1), data = sc2017_ip, family = binomial) save(fit, file="threevarfit.Rdata") predict(fit, data.frame(launch_speed = 90, launch_angle = 10, phi1 = 0)) invlogit(1.550209) ################################################ # here is the plotting part library(tidyverse) TH <- theme(plot.title = element_text( colour = "black", size = 14, hjust = 0.5, vjust = 0.8, angle = 0)) # set up a grid values of phi, ls, and la phi_v <- seq(-45, 45, length.out = 100) ls_v <- seq(80, 100, by=5) la_v <- seq(-10, 30, by=5) df <- expand.grid(phi1 = phi_v, launch_speed = ls_v, launch_angle = la_v) invlogit <- function(x){exp(x) / (1 + exp(x))} # find the predicted probability and define descriptive labels # for the values of launch speed and launch angle df\$Probability <- invlogit(predict(fit, df)) df\$Launch_Speed <- factor(df\$launch_speed, levels = ls_v, labels = paste(ls_v, "mph")) df\$Launch_Angle <- factor(df\$launch_angle, levels = la_v, labels = paste("Launch Angle =", la_v)) # here is the graph ggplot(df, aes(phi1, Probability, color=Launch_Speed, group=Launch_Speed)) + geom_line() + facet_wrap(~ Launch_Angle, labeller = label_value) + xlab("Adjusted Spray Angle (degrees)") + ylim(0, 1) + scale_colour_brewer(palette = "Reds") + theme_dark() + TH + ggtitle("Fitted Probability of Hit as Function of Spray Angle, Launch Speed, and Launch Angle")
