Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created January 30, 2016 21:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/68ca4e4a76cebba2925e to your computer and use it in GitHub Desktop.
Save bayesball/68ca4e4a76cebba2925e to your computer and use it in GitHub Desktop.
Exploring the pitcher pace (time between pitches) for games played in a week of the 2015 season
library(pitchRx)
library(dplyr)
library(ggplot2)
dat <- scrape(start = "2015-09-05", end = "2015-09-11")
pitches <- inner_join(select(dat$atbat,
batter_name, pitcher_name, inning,
gameday_link, num, url),
select(dat$pitch,
start_speed, pitch_type, sv_id, num, url),
by = c("num", "url"))
pitches <- mutate(pitches,
hours=as.numeric(substr(sv_id, 8, 9)),
minutes=as.numeric(substr(sv_id, 10, 11)),
seconds=as.numeric(substr(sv_id, 12, 13)),
time=3600 * hours + 60 * minutes + seconds)
get_data <- function(pitcher){
pdata <- filter(pitches, pitcher_name==pitcher)
pdata <- mutate(pdata, pa_id = paste(gameday_link, num))
time_differences <- function(N) diff(filter(pdata, pa_id==N)$time)
time_to_pitch <- unlist(sapply(unique(pdata$pa_id), time_differences))
data.frame(Pitcher=pitcher, Time=time_to_pitch)
}
pd <- rbind(get_data("Justin Verlander"),
get_data("Joe Kelly"),
get_data("Jon Lester"),
get_data("Stephen Strasburg"),
get_data("Bartolo Colon"),
get_data("Wade Miley"),
get_data("Max Scherzer"),
get_data("Jake Arrieta"))
S <- summarize(group_by(pd, Pitcher), M=median(Time))
ggplot(pd, aes(Time)) +
geom_histogram() +
facet_wrap(~ Pitcher, ncol=2) +
geom_vline(xintercept = 20, color="red") +
xlim(0, 60) +
geom_vline(data=S, aes(xintercept=M), color="green") +
xlab("Pace (Seconds)") +
ggtitle("Pace Data for Eight 2015 Pitchers: Median (Green), 20 (Red)")
PA <- filter(summarize(group_by(pitches,
pitcher_name, gameday_link, num),
M=median(diff(time)),
MAD=mad(diff(time)),
n=n()), is.na(M)==FALSE)
Pitchers <- summarize(group_by(PA, pitcher_name),
M=sum(M * n) / sum(n),
N=sum(n))
Pitchers100 <- filter(Pitchers, N > 99)
ggplot(filter(Pitchers100, pitcher_name != "NA"),
aes(reorder(pitcher_name, M), M)) +
geom_point() +
coord_flip() +
xlab("Median Time Between Pitches") +
ylab("Pitcher") +
ggtitle("Median Pitcher Pace for Pitchers in a Week in 2015 Season") +
geom_hline(yintercept = 20, color="red")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment