Skip to content

Instantly share code, notes, and snippets.

# bayesball/threevarwork.R Last active Dec 4, 2018

Predicting probability of hit from three variables
 # see blog post at # https://baseballwithr.wordpress.com/2018/08/21/r-code-for-probability-of-hit-given-three-variables/ # load in tidyverse package library(tidyverse) # read in the 2017 statcast data sc2017 <- read_csv("../StatcastData/statcast2017.csv") # only look at balls in play and define the hit variable sc2017 %>% filter(type == "X") %>% mutate(hit = ifelse(events %in% c("single", "double", "triple", "home_run"), 1, 0)) -> sc2017_ip # define spray angle (correct Petit's reexpression) sc2017_ip\$spray_angle <- with(sc2017_ip, round( (atan( (hc_x-125.42)/(198.27-hc_y) )*180/pi) ,1) ) # new def of spray angle that adjusts for side of batter sc2017_ip\$phi1 <- with(sc2017_ip, ifelse(stand == "L", -spray_angle, spray_angle)) # fit gam with three variables library(mgcv) fit <- gam(hit ~ s(launch_speed, launch_angle, phi1), data = sc2017_ip, family = binomial) save(fit, file="threevarfit.Rdata") predict(fit, data.frame(launch_speed = 90, launch_angle = 10, phi1 = 0)) invlogit(1.550209) ################################################ # here is the plotting part library(tidyverse) TH <- theme(plot.title = element_text( colour = "black", size = 14, hjust = 0.5, vjust = 0.8, angle = 0)) # set up a grid values of phi, ls, and la phi_v <- seq(-45, 45, length.out = 100) ls_v <- seq(80, 100, by=5) la_v <- seq(-10, 30, by=5) df <- expand.grid(phi1 = phi_v, launch_speed = ls_v, launch_angle = la_v) invlogit <- function(x){exp(x) / (1 + exp(x))} # find the predicted probability and define descriptive labels # for the values of launch speed and launch angle df\$Probability <- invlogit(predict(fit, df)) df\$Launch_Speed <- factor(df\$launch_speed, levels = ls_v, labels = paste(ls_v, "mph")) df\$Launch_Angle <- factor(df\$launch_angle, levels = la_v, labels = paste("Launch Angle =", la_v)) # here is the graph ggplot(df, aes(phi1, Probability, color=Launch_Speed, group=Launch_Speed)) + geom_line() + facet_wrap(~ Launch_Angle, labeller = label_value) + xlab("Adjusted Spray Angle (degrees)") + ylim(0, 1) + scale_colour_brewer(palette = "Reds") + theme_dark() + TH + ggtitle("Fitted Probability of Hit as Function of Spray Angle, Launch Speed, and Launch Angle")
to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.