Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active May 7, 2022 01:53
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save bayesball/ab534e5adf2924737722d80f69b127b7 to your computer and use it in GitHub Desktop.
R code to measure the change in wOBA weights from one season to a second season
data_work <- function(){
require(readr)
require(dplyr)
require(lubridate)
sc_2021 <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/statcast2021.csv")
sc_2022 <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/statcast_2022.csv")
sc_old <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/SC_BB_mini.csv")
names(sc_old)[2] <- "Game_Date"
hits <- c("single", "double", "triple",
"home_run")
sc_2021 %>%
mutate(HR = ifelse(events == "home_run", 1, 0),
H = ifelse(events %in% hits, 1, 0)) %>%
select(game_year, Game_Date, launch_angle,
launch_speed, events, HR, H) -> sc_2021
sc_2022 %>%
mutate(HR = ifelse(events == "home_run", 1, 0),
H = ifelse(events %in% hits, 1, 0)) %>%
select(game_year, Game_Date, launch_angle,
launch_speed, events, HR, H) ->
sc_2022
sc <- rbind(sc_old, sc_2021, sc_2022)
sc %>%
mutate(Season = year(Game_Date))
}
offense_loss <- function(sc_ip,
season1, season2,
LA, LS){
require(mgcv)
require(dplyr)
require(ggplot2)
require(stringr)
sc_ip_1 <- filter(sc_ip,
game_year ==season1,
launch_angle >= LA[1],
launch_angle <= LA[2],
launch_speed >= LS[1],
launch_speed <= LS[2])
sc_ip_1 %>%
mutate(Type_Hit =
ifelse(events == "single", 2,
ifelse(events == "double", 3,
ifelse(events == "triple", 4,
ifelse(events == "home_run", 5, 1)
)))) -> sc_ip_1
newfit <- gam(Type_Hit ~ s(launch_angle,
launch_speed),
family = ocat(R = 5),
data = sc_ip_1)
sc_ip_2 <- filter(sc_ip,
game_year ==season2,
launch_angle >= LA[1],
launch_angle <= LA[2],
launch_speed >= LS[1],
launch_speed <= LS[2])
LA_breaks <- seq(LA[1], LA[2], by = LA[3])
LS_breaks <- seq(LS[1], LS[2], by = LS[3])
sc_ip_2 %>%
mutate(LA = cut(launch_angle,
LA_breaks),
LS = cut(launch_speed,
LS_breaks)) -> sc_ip_2
sc_ip_2 %>%
mutate(wOBA_wt = ifelse(events == "single", 0.9,
ifelse(events == "double", 1.25,
ifelse(events == "triple", 1.6,
ifelse(events == "home_run", 2, 0)
)))) ->
sc_ip_2
probs <- predict(newfit, sc_ip_2,
type = "response")
sc_ip_2$e_woba <- 0.9 * probs[, 2] +
1.25 * probs[, 3] +
1.6 * probs[, 4] +
2 * probs[, 5]
sc_ip_2 %>%
group_by(LA, LS) %>%
summarize(IP = n(),
wOBA = sum(wOBA_wt),
E_wOBA = sum(e_woba),
Change = wOBA - E_wOBA,
Z = Change / sqrt(E_wOBA),
.groups = "drop") -> S
S %>%
filter(is.na(LA) == FALSE) %>%
filter(is.na(LS) == FALSE) -> S
convert_string <- function(y){
y1 <- gsub("[,(]", " ", y)
y2 <- gsub("[][]", "", y1)
y3 <- gsub("^ ", "", y2)
mean(as.numeric(str_split(y3, " ")[[1]]))
}
S$la <- sapply(S$LA, convert_string)
S$ls <- sapply(S$LS, convert_string)
the_plot <- ggplot(S,
aes(la, ls, label = round(Z, 2))) +
geom_label(size = 6,
aes(fill = Z < -3),
color = "white") +
geom_vline(xintercept = LA_breaks,
color = "blue") +
geom_hline(yintercept = LS_breaks,
color = "blue") +
xlim(LA[1], LA[2]) +
ylim(LS[1], LS[2]) +
scale_fill_manual(values =
c("dodgerblue",
"red")) +
theme(text = element_text(size = 18)) +
theme(plot.title = element_text(colour = "red",
size = 18,
hjust = 0.5, vjust = 0.8, angle = 0),
plot.subtitle = element_text(colour = "blue",
size = 14,
hjust = 0.5, vjust = 0.8, angle = 0)) +
xlab("Launch Angle") +
ylab("Exit Velocity") +
labs(title = paste("Z Change in wOBA - ",
season1, "to", season2),
subtitle = paste("(games through ",
max(sc_ip_2$Game_Date),
")", sep = ""))
list(S = S, the_plot = the_plot)
}
source("data_work.R")
source("offense_loss.R")
sc_ip <- data_work()
LA <- c(0, 50, 10)
LS <- c(90, 115, 5)
season1 <- 2021
season2 <- 2022
out <- offense_loss(sc_ip,
season1, season2,
LA, LS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment