Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active August 19, 2021 20:29
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/7091a2c9d8d749c995144d9d95c8ab78 to your computer and use it in GitHub Desktop.
Save bayesball/7091a2c9d8d749c995144d9d95c8ab78 to your computer and use it in GitHub Desktop.
Predicting probability of hit from three variables
# see blog post at
# https://baseballwithr.wordpress.com/2018/08/21/r-code-for-probability-of-hit-given-three-variables/
# load in tidyverse package
library(tidyverse)
# read in the 2017 statcast data
sc2017 <- read_csv("../StatcastData/statcast2017.csv")
# only look at balls in play and define the hit variable
sc2017 %>% filter(type == "X") %>%
mutate(hit = ifelse(events %in%
c("single", "double", "triple", "home_run"),
1, 0)) ->
sc2017_ip
# define spray angle (correct Petit's reexpression)
sc2017_ip$spray_angle <- with(sc2017_ip, round(
(atan(
(hc_x-125.42)/(198.27-hc_y)
)*180/pi)
,1)
)
# new def of spray angle that adjusts for side of batter
sc2017_ip$phi1 <- with(sc2017_ip,
ifelse(stand == "L",
-spray_angle, spray_angle))
# fit gam with three variables
library(mgcv)
fit <- gam(hit ~ s(launch_speed, launch_angle, phi1),
data = sc2017_ip, family = binomial)
save(fit, file="threevarfit.Rdata")
predict(fit, data.frame(launch_speed = 90,
launch_angle = 10,
phi1 = 0))
invlogit(1.550209)
################################################
# here is the plotting part
library(tidyverse)
TH <- theme(plot.title = element_text(
colour = "black",
size = 14,
hjust = 0.5, vjust = 0.8, angle = 0))
# set up a grid values of phi, ls, and la
phi_v <- seq(-45, 45, length.out = 100)
ls_v <- seq(80, 100, by=5)
la_v <- seq(-10, 30, by=5)
df <- expand.grid(phi1 = phi_v,
launch_speed = ls_v,
launch_angle = la_v)
invlogit <- function(x){exp(x) / (1 + exp(x))}
# find the predicted probability and define descriptive labels
# for the values of launch speed and launch angle
df$Probability <- invlogit(predict(fit, df))
df$Launch_Speed <- factor(df$launch_speed,
levels = ls_v,
labels = paste(ls_v, "mph"))
df$Launch_Angle <- factor(df$launch_angle,
levels = la_v,
labels = paste("Launch Angle =",
la_v))
# here is the graph
ggplot(df, aes(phi1, Probability,
color=Launch_Speed,
group=Launch_Speed)) +
geom_line() + facet_wrap(~ Launch_Angle,
labeller = label_value) +
xlab("Adjusted Spray Angle (degrees)") +
ylim(0, 1) +
scale_colour_brewer(palette = "Reds") +
theme_dark() + TH +
ggtitle("Fitted Probability of Hit as Function
of Spray Angle, Launch Speed, and Launch Angle")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment