Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created August 30, 2021 22:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/3fa2f4ceaa44c3adc19d42a915a84bff to your computer and use it in GitHub Desktop.
Save bayesball/3fa2f4ceaa44c3adc19d42a915a84bff to your computer and use it in GitHub Desktop.
R script to produce graphs for blog post "Home Run Update: August 31"
# required packages
library(dplyr)
library(ggplot2)
library(readr)
library(lubridate)
library(stringr)
# some helper functions
increasefont <- function (){
theme(text = element_text(size = 18))
}
centertitle <- function (){
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
}
# get statcast data - row bind older and 2021 datasets
sc_2021 <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/statcast2021.csv")
sc_old <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/SC_BB_mini.csv")
names(sc_old)[2] <- "Game_Date"
sc_2021 %>%
mutate(HR = ifelse(events == "home_run", 1, 0)) %>%
select(game_year, Game_Date, launch_angle,
launch_speed, HR) -> sc_2021
sc <- rbind(sc_old, sc_2021)
######## sample of batted balls graph
sc$hr <- ifelse(sc$HR == 1, "Y", "N")
ggplot() +
geom_point(data = filter(sample_n(sc, size = 10000),
launch_angle > 10,
launch_angle < 50,
launch_speed > 90,
launch_speed < 115),
aes(launch_angle, launch_speed,
color = hr)) +
scale_color_manual(values = c("tan", "red")) +
increasefont() +
ggtitle("Sample of Batted Balls") +
centertitle() +
geom_path(data = data.frame(
x = c(20, 40, 40, 20, 20),
y = c(95, 95, 115, 115, 95)
), aes(x, y),
size = 2) +
xlab("Launch Angle") +
ylab("Launch Speed")
# focus on la between 20 and 40 (4 groups)
# focus on ls between 95 and 115 (4 groups)
# group data by subgroup
sc %>%
mutate(Season = year(Game_Date)) -> sc
sc %>%
mutate(LA = cut(launch_angle,
seq(20, 40, by = 5)),
LS = cut(launch_speed,
seq(95, 115, by = 5))) -> sc
sc %>%
filter(is.na(LA) == FALSE,
is.na(LS) == FALSE) %>%
group_by(Season, LA, LS) %>%
summarize(N = n(),
HR = sum(HR),
.groups = "drop") -> S
sc %>%
group_by(Season) %>%
summarize(IP = n()) -> S1
inner_join(S, S1) -> S
convert_string <- function(y){
y1 <- gsub("[,(]", " ", y)
y2 <- gsub("[][]", "", y1)
y3 <- gsub("^ ", "", y2)
mean(as.numeric(str_split(y3, " ")[[1]]))
}
S$la <- sapply(S$LA, convert_string)
S$ls <- sapply(S$LS, convert_string)
###################### home run rates graph
S$season <- paste(S$Season, "Season")
ggplot(filter(S, Season %in%
c(2017, 2018, 2019, 2021)),
aes(la, ls,
label = round(100 * HR / N))) +
geom_label(size = 7, color = "white",
fill = "blue") +
geom_label(data = filter(S, Season == 2021,
ls %in% c(102.5, 97.5)),
size = 7, color = "black",
fill = "orange") +
facet_wrap(~ season) +
ylim(94, 116) +
xlim(19, 41) +
increasefont() +
ggtitle("Home Run Rates") +
centertitle() +
xlab("Launch Angle") +
ylab("Launch Velocity") +
theme(
strip.text = element_text(
size = 18, color = "red", face = "bold"
))
########## launch condition rates graph
ggplot(filter(S, Season %in%
c(2017, 2018, 2019, 2021)),
aes(la, ls,
label = round(100 * N / IP, 1))) +
geom_label(size = 7, color = "white",
fill = "blue") +
geom_label(data = filter(S, Season == 2021,
ls %in% c(102.5, 107.5)),
size = 7, color = "black",
fill = "orange") +
facet_wrap(~ season) +
ylim(94, 116) +
xlim(19, 41) +
increasefont() +
ggtitle("Launch Condition Rates") +
centertitle() +
xlab("Launch Angle") +
ylab("Launch Velocity") +
theme(
strip.text = element_text(
size = 18, color = "red", face = "bold"
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment