Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created August 19, 2022 20:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/8bbd5b151238a3d93ba72fb2d3a80d81 to your computer and use it in GitHub Desktop.
Save bayesball/8bbd5b151238a3d93ba72fb2d3a80d81 to your computer and use it in GitHub Desktop.
R function to implement computations for sacrifice flies blog post
sac_fly_work <- function(sc, season){
# load required packages
require(dplyr)
require(ggplot2)
require(metR)
require(mgcv)
require(CalledStrike)
# define location, distance, and spray angle vars
sc %>%
mutate(location_x = 2.33 * (hc_x - 126),
location_y = 2.33 * (204.5 - hc_y),
distance = sqrt(location_x ^ 2 +
location_y ^ 2),
spray_angle = atan(location_x / location_y) *
180 / pi) ->
sc
# focus on batted balls hit with runner on 3rd
# consider balls in the air where launch angle > 10
# outcomes are out, double-play or sac fly
sc %>%
filter(type == "X",
outs_when_up < 2,
events %in%
c("sac_fly",
"sac_fly_double_play",
"field_out",
"double_play"),
nchar(on_3b) > 0,
launch_angle > 10,
distance > 200) %>%
mutate(SAC_FLY = ifelse(events %in%
c("sac_fly",
"sac_fly_double_play"),
"YES", "NO"),
Success = ifelse(SAC_FLY == "YES",
1, 0)) -> sc_interest
# define out at home variable
sc_interest %>%
mutate(Out_at_Home =
ifelse(grepl("catcher", des) == TRUE &
grepl("out at home", des) == TRUE, "YES",
"NO")) -> sc_interest
# plot locations of out of home plays among those
# which didn't result in sacrifice flies
title0 <- paste(season,
"Locations of Unsuccesful SF\n with Out at Home Identified")
plot0 <- ggplot(filter(sc_interest,
SAC_FLY == "NO"),
aes(location_x, location_y,
color = Out_at_Home)) +
geom_point() +
increasefont() +
ggtitle(title0) +
centertitle() +
coord_fixed() +
xlim(-230, 230) +
ylim(120, 420)
# plot locations of fly balls, showing ones
# that give sac fries
title1 <- paste(season,
"Locations of Batted Balls in Air\n 0 or 1 Outs, Runner on 3rd")
plot1 <- ggplot(sc_interest,
aes(location_x, location_y,
color = SAC_FLY)) +
geom_point() +
increasefont() +
ggtitle(title1) +
centertitle() +
coord_fixed() +
xlim(-230, 230) +
ylim(120, 420)
# plot shows sac fly indicator as function of
# distance
title2 <- paste(season,
"SF as Function of Distance")
plot2 <- ggplot(sc_interest,
aes(distance, Success)) +
geom_jitter(width = 0,
height = 0.05) +
increasefont() +
ggtitle(title2) +
centertitle() +
xlab("Distance")
# remove values where distance
# is missing
sc_interest2 <- filter(sc_interest,
is.na(distance) == FALSE)
# fit logistic model
fit <- glm(Success ~ distance,
family = binomial,
data = sc_interest2)
# computed fitted probabilities
sc_interest2$Probability <-
predict(fit, type = "response")
# find distance where P(SF) = 0.5
median_dist <- approx(sc_interest2$Probability,
sc_interest2$distance, .5)$y
# define batting team
sc_interest2 %>%
mutate(bat_team =
ifelse(inning_topbot == "Top",
away_team, home_team)) ->
sc_interest2
# residuals for each team
sc_interest2 %>%
group_by(bat_team) %>%
summarize(Obs = sum(Success),
Exp = sum(Probability),
Z = (Obs - Exp) / sqrt(Exp)) -> S
# plot predicted probabilities of SF
title3 <- paste(season,
"P(SF) as Function of Distance")
plot3 <- ggplot(sc_interest2,
aes(distance, Probability)) +
geom_point() +
increasefont()+
ggtitle(title3) +
centertitle() +
xlab("Distance")
# fit GAM with distance and spray angle as predictors
fit2 <- gam(Success ~ s(distance, spray_angle),
family = binomial,
data = sc_interest2)
# show contours where P(SF) = .1 (.2) .9
title4 <- paste(season,
"Contour Graph of P(SF) as \n Function of Distance and Spray Angle")
# compute predicted probs on grid
grid <- expand.grid(spray_angle = seq(-45, 45,
length=50),
distance = seq(200, 400,
length=50))
grid$lp <- predict(fit2, grid, type = "response")
# plot contour graph
plot4 <- ggplot(grid) +
geom_contour_fill(aes(x=spray_angle,
y=distance,
z=lp),
breaks=c(0, .1, .3, .5, .7, .9, 1),
size=1.5) +
geom_contour(aes(x=spray_angle,
y=distance,
z=lp),
breaks=0.5,
size=1.5,
color = "black") +
scale_fill_distiller(palette="Spectral") +
ylim(200, 300) +
increasefont() +
ggtitle(title4) +
centertitle() +
xlab("Spray Angle") +
ylab("Distance")
# plot of residuals
title5 <- paste(season, "Team SF Standardized Residuals")
plot5 <- ggplot(S, aes(Z, bat_team)) +
geom_point() +
geom_vline(xintercept = 0,
color = "red") +
increasefont() +
xlab("Standardized Residual") +
ylab("Batting Team") +
ggtitle(title5) +
centertitle()
# return output as a list
list(median_dist = median_dist,
plot0 = plot0,
plot1 = plot1,
plot2 = plot2,
plot3 = plot3,
plot4 = plot4,
plot5 = plot5)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment