Skip to content

Instantly share code, notes, and snippets.

@bayesball bayesball/threevarwork.R
Last active Dec 4, 2018

Embed
What would you like to do?
Predicting probability of hit from three variables
# see blog post at
# https://baseballwithr.wordpress.com/2018/08/21/r-code-for-probability-of-hit-given-three-variables/
# load in tidyverse package
library(tidyverse)
# read in the 2017 statcast data
sc2017 <- read_csv("../StatcastData/statcast2017.csv")
# only look at balls in play and define the hit variable
sc2017 %>% filter(type == "X") %>%
mutate(hit = ifelse(events %in%
c("single", "double", "triple", "home_run"),
1, 0)) ->
sc2017_ip
# define spray angle (correct Petit's reexpression)
sc2017_ip$spray_angle <- with(sc2017_ip, round(
(atan(
(hc_x-125.42)/(198.27-hc_y)
)*180/pi)
,1)
)
# new def of spray angle that adjusts for side of batter
sc2017_ip$phi1 <- with(sc2017_ip,
ifelse(stand == "L",
-spray_angle, spray_angle))
# fit gam with three variables
library(mgcv)
fit <- gam(hit ~ s(launch_speed, launch_angle, phi1),
data = sc2017_ip, family = binomial)
save(fit, file="threevarfit.Rdata")
predict(fit, data.frame(launch_speed = 90,
launch_angle = 10,
phi1 = 0))
invlogit(1.550209)
################################################
# here is the plotting part
library(tidyverse)
TH <- theme(plot.title = element_text(
colour = "black",
size = 14,
hjust = 0.5, vjust = 0.8, angle = 0))
# set up a grid values of phi, ls, and la
phi_v <- seq(-45, 45, length.out = 100)
ls_v <- seq(80, 100, by=5)
la_v <- seq(-10, 30, by=5)
df <- expand.grid(phi1 = phi_v,
launch_speed = ls_v,
launch_angle = la_v)
invlogit <- function(x){exp(x) / (1 + exp(x))}
# find the predicted probability and define descriptive labels
# for the values of launch speed and launch angle
df$Probability <- invlogit(predict(fit, df))
df$Launch_Speed <- factor(df$launch_speed,
levels = ls_v,
labels = paste(ls_v, "mph"))
df$Launch_Angle <- factor(df$launch_angle,
levels = la_v,
labels = paste("Launch Angle =",
la_v))
# here is the graph
ggplot(df, aes(phi1, Probability,
color=Launch_Speed,
group=Launch_Speed)) +
geom_line() + facet_wrap(~ Launch_Angle,
labeller = label_value) +
xlab("Adjusted Spray Angle (degrees)") +
ylim(0, 1) +
scale_colour_brewer(palette = "Reds") +
theme_dark() + TH +
ggtitle("Fitted Probability of Hit as Function
of Spray Angle, Launch Speed, and Launch Angle")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.