Created
August 19, 2022 20:04
-
-
Save bayesball/8bbd5b151238a3d93ba72fb2d3a80d81 to your computer and use it in GitHub Desktop.
R function to implement computations for sacrifice flies blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sac_fly_work <- function(sc, season){ | |
# load required packages | |
require(dplyr) | |
require(ggplot2) | |
require(metR) | |
require(mgcv) | |
require(CalledStrike) | |
# define location, distance, and spray angle vars | |
sc %>% | |
mutate(location_x = 2.33 * (hc_x - 126), | |
location_y = 2.33 * (204.5 - hc_y), | |
distance = sqrt(location_x ^ 2 + | |
location_y ^ 2), | |
spray_angle = atan(location_x / location_y) * | |
180 / pi) -> | |
sc | |
# focus on batted balls hit with runner on 3rd | |
# consider balls in the air where launch angle > 10 | |
# outcomes are out, double-play or sac fly | |
sc %>% | |
filter(type == "X", | |
outs_when_up < 2, | |
events %in% | |
c("sac_fly", | |
"sac_fly_double_play", | |
"field_out", | |
"double_play"), | |
nchar(on_3b) > 0, | |
launch_angle > 10, | |
distance > 200) %>% | |
mutate(SAC_FLY = ifelse(events %in% | |
c("sac_fly", | |
"sac_fly_double_play"), | |
"YES", "NO"), | |
Success = ifelse(SAC_FLY == "YES", | |
1, 0)) -> sc_interest | |
# define out at home variable | |
sc_interest %>% | |
mutate(Out_at_Home = | |
ifelse(grepl("catcher", des) == TRUE & | |
grepl("out at home", des) == TRUE, "YES", | |
"NO")) -> sc_interest | |
# plot locations of out of home plays among those | |
# which didn't result in sacrifice flies | |
title0 <- paste(season, | |
"Locations of Unsuccesful SF\n with Out at Home Identified") | |
plot0 <- ggplot(filter(sc_interest, | |
SAC_FLY == "NO"), | |
aes(location_x, location_y, | |
color = Out_at_Home)) + | |
geom_point() + | |
increasefont() + | |
ggtitle(title0) + | |
centertitle() + | |
coord_fixed() + | |
xlim(-230, 230) + | |
ylim(120, 420) | |
# plot locations of fly balls, showing ones | |
# that give sac fries | |
title1 <- paste(season, | |
"Locations of Batted Balls in Air\n 0 or 1 Outs, Runner on 3rd") | |
plot1 <- ggplot(sc_interest, | |
aes(location_x, location_y, | |
color = SAC_FLY)) + | |
geom_point() + | |
increasefont() + | |
ggtitle(title1) + | |
centertitle() + | |
coord_fixed() + | |
xlim(-230, 230) + | |
ylim(120, 420) | |
# plot shows sac fly indicator as function of | |
# distance | |
title2 <- paste(season, | |
"SF as Function of Distance") | |
plot2 <- ggplot(sc_interest, | |
aes(distance, Success)) + | |
geom_jitter(width = 0, | |
height = 0.05) + | |
increasefont() + | |
ggtitle(title2) + | |
centertitle() + | |
xlab("Distance") | |
# remove values where distance | |
# is missing | |
sc_interest2 <- filter(sc_interest, | |
is.na(distance) == FALSE) | |
# fit logistic model | |
fit <- glm(Success ~ distance, | |
family = binomial, | |
data = sc_interest2) | |
# computed fitted probabilities | |
sc_interest2$Probability <- | |
predict(fit, type = "response") | |
# find distance where P(SF) = 0.5 | |
median_dist <- approx(sc_interest2$Probability, | |
sc_interest2$distance, .5)$y | |
# define batting team | |
sc_interest2 %>% | |
mutate(bat_team = | |
ifelse(inning_topbot == "Top", | |
away_team, home_team)) -> | |
sc_interest2 | |
# residuals for each team | |
sc_interest2 %>% | |
group_by(bat_team) %>% | |
summarize(Obs = sum(Success), | |
Exp = sum(Probability), | |
Z = (Obs - Exp) / sqrt(Exp)) -> S | |
# plot predicted probabilities of SF | |
title3 <- paste(season, | |
"P(SF) as Function of Distance") | |
plot3 <- ggplot(sc_interest2, | |
aes(distance, Probability)) + | |
geom_point() + | |
increasefont()+ | |
ggtitle(title3) + | |
centertitle() + | |
xlab("Distance") | |
# fit GAM with distance and spray angle as predictors | |
fit2 <- gam(Success ~ s(distance, spray_angle), | |
family = binomial, | |
data = sc_interest2) | |
# show contours where P(SF) = .1 (.2) .9 | |
title4 <- paste(season, | |
"Contour Graph of P(SF) as \n Function of Distance and Spray Angle") | |
# compute predicted probs on grid | |
grid <- expand.grid(spray_angle = seq(-45, 45, | |
length=50), | |
distance = seq(200, 400, | |
length=50)) | |
grid$lp <- predict(fit2, grid, type = "response") | |
# plot contour graph | |
plot4 <- ggplot(grid) + | |
geom_contour_fill(aes(x=spray_angle, | |
y=distance, | |
z=lp), | |
breaks=c(0, .1, .3, .5, .7, .9, 1), | |
size=1.5) + | |
geom_contour(aes(x=spray_angle, | |
y=distance, | |
z=lp), | |
breaks=0.5, | |
size=1.5, | |
color = "black") + | |
scale_fill_distiller(palette="Spectral") + | |
ylim(200, 300) + | |
increasefont() + | |
ggtitle(title4) + | |
centertitle() + | |
xlab("Spray Angle") + | |
ylab("Distance") | |
# plot of residuals | |
title5 <- paste(season, "Team SF Standardized Residuals") | |
plot5 <- ggplot(S, aes(Z, bat_team)) + | |
geom_point() + | |
geom_vline(xintercept = 0, | |
color = "red") + | |
increasefont() + | |
xlab("Standardized Residual") + | |
ylab("Batting Team") + | |
ggtitle(title5) + | |
centertitle() | |
# return output as a list | |
list(median_dist = median_dist, | |
plot0 = plot0, | |
plot1 = plot1, | |
plot2 = plot2, | |
plot3 = plot3, | |
plot4 = plot4, | |
plot5 = plot5) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment