Created
May 7, 2021 12:50
-
-
Save bayesball/458c2a55b121e6201c7e603c822780f0 to your computer and use it in GitHub Desktop.
Functions that bin four-seam pitch measurements and graph rates over the zone.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bin_FF_locations <- function(sc, plateX, plateZ){ | |
# inputs: | |
# ------------------------------------ | |
# sc - statcast data with variables plate_x, | |
# plate_z, description, stand, type, events | |
# plateX = c(plateX_lo, plateX_hi, width_X) | |
# plateZ = c(plateZ_lo, plateZ_hi, width_Z) | |
require(dplyr) | |
# compute number of FF pitches thrown to each side | |
NT <- sc %>% | |
group_by(stand) %>% | |
summarize(N = n()) | |
swing_situations <- c("hit_into_play", | |
"foul", "swinging_strike", | |
"swinging_strike_blocked", | |
"missed_bunt", | |
"hit_into_play_no_out", "foul_bunt", | |
"foul_tip", "hit_into_play_score") | |
miss_situations <- c("swinging_strike", | |
"swinging_strike_blocked") | |
hits <- c("single", | |
"double", "triple", "home_run") | |
# define Swing, Miss, InPlay, Hit, and HR variables | |
sc %>% | |
mutate(Swing = ifelse(description %in% | |
swing_situations, 1, 0), | |
Miss = ifelse(description %in% | |
miss_situations, 1, 0), | |
InPlay = ifelse(type == "X", 1, 0), | |
Hit = ifelse(events %in% hits, 1, 0), | |
HR = ifelse(events == "home_run", 1, 0)) -> sc | |
plate_x_lo <- plateX[1] | |
plate_x_hi <- plateX[2] | |
width_x <- plateX[3] | |
plate_z_lo <- plateZ[1] | |
plate_z_hi <- plateZ[2] | |
width_z <- plateZ[3] | |
# NOTE: left endpoint is not included but | |
# right endpoint is included | |
# focus on pitches thrown within zone | |
sc %>% | |
filter(plate_x > plate_x_lo, | |
plate_x <= plate_x_hi, | |
plate_z > plate_z_lo, | |
plate_z <= plate_z_hi) -> sc2 | |
# set up breakpoints for bins | |
px_breaks <- seq(plate_x_lo, plate_x_hi, | |
by = width_x) | |
pz_breaks <- seq(plate_z_lo, plate_z_hi, | |
by = width_z) | |
# bin the values of plate_x and plate_z | |
sc2$px_c <- cut(sc2$plate_x, | |
breaks = px_breaks) | |
sc2$pz_c <- cut(sc2$plate_z, | |
breaks = pz_breaks) | |
sc2 %>% | |
filter(is.na(px_c) == FALSE, | |
is.na(pz_c) == FALSE) -> sc2 | |
# function to extract midpoint of a bin interval | |
myf <- function(y){ | |
mean(as.numeric(unlist(strsplit( | |
gsub("\\(|\\]", "", as.character(y)), | |
",")))) | |
} | |
# work on pitches thrown to right-handed batters | |
# compute counts in bins | |
sc2 %>% | |
filter(stand == "R") %>% | |
group_by(px_c, pz_c, | |
.drop = FALSE) %>% | |
summarize(NT = NT$N[NT$stand == "R"], | |
N = n(), | |
Swing = sum(Swing, n.rm = TRUE), | |
Miss = sum(Miss, na.rm = TRUE), | |
InPlay = sum(InPlay, na.rm = TRUE), | |
Hit = sum(Hit, na.rm = TRUE), | |
HR = sum(HR, na.rm = TRUE), | |
.groups = "drop") -> OUT_R | |
# add bin midpoints | |
OUT_R$PX <- sapply(OUT_R$px_c, myf) | |
OUT_R$PZ <- sapply(OUT_R$pz_c, myf) | |
OUT_R$stand <- "Right" | |
# now left-handed batters | |
sc2 %>% | |
filter(stand == "L") %>% | |
group_by(px_c, pz_c, | |
.drop = FALSE) %>% | |
summarize(NT = NT$N[NT$stand == "L"], | |
N = n(), | |
Swing = sum(Swing, n.rm = TRUE), | |
Miss = sum(Miss, na.rm = TRUE), | |
InPlay = sum(InPlay, na.rm = TRUE), | |
Hit = sum(Hit, na.rm = TRUE), | |
HR = sum(HR, na.rm = TRUE), | |
.groups = "drop") -> OUT_L | |
# add bin midpoints | |
OUT_L$PX <- sapply(OUT_L$px_c, myf) | |
OUT_L$PZ <- sapply(OUT_L$pz_c, myf) | |
OUT_L$stand <- "Left" | |
# combine left and right data frames | |
OUT <- rbind(OUT_L, OUT_R) | |
# compute five rates | |
OUT %>% | |
mutate(P1 = 100 * N / NT, # location rate | |
P2 = 100 * Swing / N, # swing rate | |
P3 = 100 * Miss / Swing, # miss rate | |
P4 = Hit / InPlay, # BABIP | |
P5 = 100 * HR / InPlay) -> # HR rate | |
OUT | |
OUT %>% select(stand, PX, PZ, NT, N, | |
Swing, Miss, | |
P1, P2, P3, P4, P5) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_rates <- function(out, | |
title = "", | |
subtitle = "", | |
digits = 0){ | |
# inputs: | |
# - data frame with four variables PX, PZ, stand, PCT | |
# - title and subtitle of graph | |
# - number of digits to right of decimal in output | |
out$PCT <- round(out$PCT, digits) | |
out$Sign <- ifelse(out$PCT > 0, "pos", "neg") | |
out$stand <- factor(out$stand, | |
levels = c("Right", "Left"), | |
labels = c("Right-Handed Hitters", | |
"Left-Handed Hitters")) | |
if(mean(out$PCT > 0) == 1){ | |
p1 <- ggplot() + | |
geom_label(data = filter(out, PX > 0), | |
aes(PX, PZ, label = PCT), | |
size = 5, | |
fill = "salmon", | |
color = "black") + | |
geom_label(data = filter(out, PX < 0), | |
aes(PX, PZ, label = PCT), | |
size = 5, | |
fill = "salmon", | |
color = "black")} else { | |
p1 <- ggplot() + | |
geom_label(data = filter(out, PX > 0), | |
aes(PX, PZ, label = PCT, | |
fill = Sign), | |
size = 5, | |
color = "black") + | |
geom_label(data = filter(out, PX < 0), | |
aes(PX, PZ, label = PCT, | |
fill = Sign), | |
size = 5, | |
color = "black") | |
} | |
p1 + | |
facet_wrap(~ stand, ncol = 2) + | |
coord_fixed() + | |
xlim(-1, 1) + ylim(1.5, 3.5) + | |
labs(title = title, subtitle = subtitle) + | |
theme(text=element_text(size=16)) + | |
theme(plot.title = element_text(colour = "blue", | |
size = 18, | |
hjust = 0.5, vjust = 0.8, angle = 0), | |
plot.subtitle = element_text(colour = "blue", | |
size = 18, | |
hjust = 0.5, vjust = 0.8, angle = 0), | |
strip.text = element_text( | |
size = 14, color = "blue") | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment