Created
January 30, 2016 21:52
-
-
Save bayesball/68ca4e4a76cebba2925e to your computer and use it in GitHub Desktop.
Exploring the pitcher pace (time between pitches) for games played in a week of the 2015 season
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(pitchRx) | |
library(dplyr) | |
library(ggplot2) | |
dat <- scrape(start = "2015-09-05", end = "2015-09-11") | |
pitches <- inner_join(select(dat$atbat, | |
batter_name, pitcher_name, inning, | |
gameday_link, num, url), | |
select(dat$pitch, | |
start_speed, pitch_type, sv_id, num, url), | |
by = c("num", "url")) | |
pitches <- mutate(pitches, | |
hours=as.numeric(substr(sv_id, 8, 9)), | |
minutes=as.numeric(substr(sv_id, 10, 11)), | |
seconds=as.numeric(substr(sv_id, 12, 13)), | |
time=3600 * hours + 60 * minutes + seconds) | |
get_data <- function(pitcher){ | |
pdata <- filter(pitches, pitcher_name==pitcher) | |
pdata <- mutate(pdata, pa_id = paste(gameday_link, num)) | |
time_differences <- function(N) diff(filter(pdata, pa_id==N)$time) | |
time_to_pitch <- unlist(sapply(unique(pdata$pa_id), time_differences)) | |
data.frame(Pitcher=pitcher, Time=time_to_pitch) | |
} | |
pd <- rbind(get_data("Justin Verlander"), | |
get_data("Joe Kelly"), | |
get_data("Jon Lester"), | |
get_data("Stephen Strasburg"), | |
get_data("Bartolo Colon"), | |
get_data("Wade Miley"), | |
get_data("Max Scherzer"), | |
get_data("Jake Arrieta")) | |
S <- summarize(group_by(pd, Pitcher), M=median(Time)) | |
ggplot(pd, aes(Time)) + | |
geom_histogram() + | |
facet_wrap(~ Pitcher, ncol=2) + | |
geom_vline(xintercept = 20, color="red") + | |
xlim(0, 60) + | |
geom_vline(data=S, aes(xintercept=M), color="green") + | |
xlab("Pace (Seconds)") + | |
ggtitle("Pace Data for Eight 2015 Pitchers: Median (Green), 20 (Red)") | |
PA <- filter(summarize(group_by(pitches, | |
pitcher_name, gameday_link, num), | |
M=median(diff(time)), | |
MAD=mad(diff(time)), | |
n=n()), is.na(M)==FALSE) | |
Pitchers <- summarize(group_by(PA, pitcher_name), | |
M=sum(M * n) / sum(n), | |
N=sum(n)) | |
Pitchers100 <- filter(Pitchers, N > 99) | |
ggplot(filter(Pitchers100, pitcher_name != "NA"), | |
aes(reorder(pitcher_name, M), M)) + | |
geom_point() + | |
coord_flip() + | |
xlab("Median Time Between Pitches") + | |
ylab("Pitcher") + | |
ggtitle("Median Pitcher Pace for Pitchers in a Week in 2015 Season") + | |
geom_hline(yintercept = 20, color="red") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment