Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created May 7, 2021 12:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/458c2a55b121e6201c7e603c822780f0 to your computer and use it in GitHub Desktop.
Save bayesball/458c2a55b121e6201c7e603c822780f0 to your computer and use it in GitHub Desktop.
Functions that bin four-seam pitch measurements and graph rates over the zone.
bin_FF_locations <- function(sc, plateX, plateZ){
# inputs:
# ------------------------------------
# sc - statcast data with variables plate_x,
# plate_z, description, stand, type, events
# plateX = c(plateX_lo, plateX_hi, width_X)
# plateZ = c(plateZ_lo, plateZ_hi, width_Z)
require(dplyr)
# compute number of FF pitches thrown to each side
NT <- sc %>%
group_by(stand) %>%
summarize(N = n())
swing_situations <- c("hit_into_play",
"foul", "swinging_strike",
"swinging_strike_blocked",
"missed_bunt",
"hit_into_play_no_out", "foul_bunt",
"foul_tip", "hit_into_play_score")
miss_situations <- c("swinging_strike",
"swinging_strike_blocked")
hits <- c("single",
"double", "triple", "home_run")
# define Swing, Miss, InPlay, Hit, and HR variables
sc %>%
mutate(Swing = ifelse(description %in%
swing_situations, 1, 0),
Miss = ifelse(description %in%
miss_situations, 1, 0),
InPlay = ifelse(type == "X", 1, 0),
Hit = ifelse(events %in% hits, 1, 0),
HR = ifelse(events == "home_run", 1, 0)) -> sc
plate_x_lo <- plateX[1]
plate_x_hi <- plateX[2]
width_x <- plateX[3]
plate_z_lo <- plateZ[1]
plate_z_hi <- plateZ[2]
width_z <- plateZ[3]
# NOTE: left endpoint is not included but
# right endpoint is included
# focus on pitches thrown within zone
sc %>%
filter(plate_x > plate_x_lo,
plate_x <= plate_x_hi,
plate_z > plate_z_lo,
plate_z <= plate_z_hi) -> sc2
# set up breakpoints for bins
px_breaks <- seq(plate_x_lo, plate_x_hi,
by = width_x)
pz_breaks <- seq(plate_z_lo, plate_z_hi,
by = width_z)
# bin the values of plate_x and plate_z
sc2$px_c <- cut(sc2$plate_x,
breaks = px_breaks)
sc2$pz_c <- cut(sc2$plate_z,
breaks = pz_breaks)
sc2 %>%
filter(is.na(px_c) == FALSE,
is.na(pz_c) == FALSE) -> sc2
# function to extract midpoint of a bin interval
myf <- function(y){
mean(as.numeric(unlist(strsplit(
gsub("\\(|\\]", "", as.character(y)),
","))))
}
# work on pitches thrown to right-handed batters
# compute counts in bins
sc2 %>%
filter(stand == "R") %>%
group_by(px_c, pz_c,
.drop = FALSE) %>%
summarize(NT = NT$N[NT$stand == "R"],
N = n(),
Swing = sum(Swing, n.rm = TRUE),
Miss = sum(Miss, na.rm = TRUE),
InPlay = sum(InPlay, na.rm = TRUE),
Hit = sum(Hit, na.rm = TRUE),
HR = sum(HR, na.rm = TRUE),
.groups = "drop") -> OUT_R
# add bin midpoints
OUT_R$PX <- sapply(OUT_R$px_c, myf)
OUT_R$PZ <- sapply(OUT_R$pz_c, myf)
OUT_R$stand <- "Right"
# now left-handed batters
sc2 %>%
filter(stand == "L") %>%
group_by(px_c, pz_c,
.drop = FALSE) %>%
summarize(NT = NT$N[NT$stand == "L"],
N = n(),
Swing = sum(Swing, n.rm = TRUE),
Miss = sum(Miss, na.rm = TRUE),
InPlay = sum(InPlay, na.rm = TRUE),
Hit = sum(Hit, na.rm = TRUE),
HR = sum(HR, na.rm = TRUE),
.groups = "drop") -> OUT_L
# add bin midpoints
OUT_L$PX <- sapply(OUT_L$px_c, myf)
OUT_L$PZ <- sapply(OUT_L$pz_c, myf)
OUT_L$stand <- "Left"
# combine left and right data frames
OUT <- rbind(OUT_L, OUT_R)
# compute five rates
OUT %>%
mutate(P1 = 100 * N / NT, # location rate
P2 = 100 * Swing / N, # swing rate
P3 = 100 * Miss / Swing, # miss rate
P4 = Hit / InPlay, # BABIP
P5 = 100 * HR / InPlay) -> # HR rate
OUT
OUT %>% select(stand, PX, PZ, NT, N,
Swing, Miss,
P1, P2, P3, P4, P5)
}
plot_rates <- function(out,
title = "",
subtitle = "",
digits = 0){
# inputs:
# - data frame with four variables PX, PZ, stand, PCT
# - title and subtitle of graph
# - number of digits to right of decimal in output
out$PCT <- round(out$PCT, digits)
out$Sign <- ifelse(out$PCT > 0, "pos", "neg")
out$stand <- factor(out$stand,
levels = c("Right", "Left"),
labels = c("Right-Handed Hitters",
"Left-Handed Hitters"))
if(mean(out$PCT > 0) == 1){
p1 <- ggplot() +
geom_label(data = filter(out, PX > 0),
aes(PX, PZ, label = PCT),
size = 5,
fill = "salmon",
color = "black") +
geom_label(data = filter(out, PX < 0),
aes(PX, PZ, label = PCT),
size = 5,
fill = "salmon",
color = "black")} else {
p1 <- ggplot() +
geom_label(data = filter(out, PX > 0),
aes(PX, PZ, label = PCT,
fill = Sign),
size = 5,
color = "black") +
geom_label(data = filter(out, PX < 0),
aes(PX, PZ, label = PCT,
fill = Sign),
size = 5,
color = "black")
}
p1 +
facet_wrap(~ stand, ncol = 2) +
coord_fixed() +
xlim(-1, 1) + ylim(1.5, 3.5) +
labs(title = title, subtitle = subtitle) +
theme(text=element_text(size=16)) +
theme(plot.title = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
plot.subtitle = element_text(colour = "blue",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
strip.text = element_text(
size = 14, color = "blue")
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment