Last active
March 9, 2018 13:47
-
-
Save bayesball/af2afa83dddefe790866ba772afc3329 to your computer and use it in GitHub Desktop.
Looking for streaky patterns in Statcast launch speeds
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load in packages | |
library(tidyverse) | |
library(broom) | |
library(lubridate) | |
# read in statcast data | |
# see https://gist.github.com/bayesball/2d51673b7c10842baaee18b110b1a2e9 | |
# to see how one downloads this data using the baseballr package | |
statcast2017 <- read_csv("~/Dropbox/2016 WORK/BLOG Baseball R/OTHER/rand_effects_statcast/statcast2017.csv") | |
# look only on balls in play | |
statcast2017 %>% filter(type == "X") %>% | |
mutate(HIT = ifelse(events %in% | |
c("single", "double", "triple", "home_run"), | |
1, 0)) -> | |
sc2017_ip | |
# want to limit to batters who have at least | |
# N_min balls in play | |
sc2017_ip %>% group_by(batter) %>% count() -> | |
S | |
Nmin <- 400 | |
inner_join(sc2017_ip, S) %>% | |
filter(n >= Nmin) -> sc2017_ip_2 | |
# theme for plotting title in ggplot2 | |
TH <- theme( | |
plot.title = element_text( | |
colour = "blue", | |
size = 16, | |
hjust = 0.5, | |
vjust = 0.8, | |
angle = 0 | |
) | |
) | |
# only want data from 2017 regular season | |
# break into 13 two-week periods (add last Sunday to | |
# last biweek) | |
sc2017_ip_2 %>% | |
filter(game_date <= "2017-10-01") %>% | |
mutate(biweek = floor(week(game_date) / 2), | |
biweek = ifelse(biweek > 19, 19, biweek)) -> | |
sc_new | |
# confirm that we have chosen reasonable values of | |
# biweek | |
ggplot(sc_new, aes(biweek)) + | |
geom_bar() | |
###### some fitting | |
sc_new %>% group_by(biweek) %>% | |
summarize(mean_LS = mean(launch_speed), | |
mean_LA = mean(launch_angle)) -> | |
S2 | |
# how has average launch speed changed over the | |
# 2017 season? | |
p1 <- ggplot(S2, aes(biweek, mean_LS)) + | |
geom_point() + TH + geom_smooth() + | |
ggtitle("Average Launch Speed Through the Season") | |
# How about launch angle? | |
p2 <- ggplot(S2, aes(biweek, mean_LA)) + | |
geom_point() + TH + geom_smooth() + | |
ggtitle("Average Launch Angle Through the Season") | |
library(gridExtra) | |
grid.arrange(p1, p2) | |
################### | |
# fit many regressions (by player) | |
# predicting launch speed by biweek | |
regressions <- sc_new %>% | |
group_by(batter) %>% | |
do(fit = lm(launch_speed ~ factor(biweek), | |
data = .)) | |
regressions %>% glance(fit) -> Summary_regressions | |
# graph R^2 values for all players | |
Summary_regressions %>% | |
ggplot(aes(as.factor(batter), r.squared)) + | |
geom_point() + coord_flip() + | |
xlab("Player ID") + TH + | |
ggtitle("R Squared Values for All Batters") | |
# look at four players where the r.squared value | |
# exceeds 0.06 | |
streaky_regs <- filter(Summary_regressions, | |
r.squared > .06) | |
streaky_data <- filter(sc_new, | |
batter %in% streaky_regs$batter) | |
ggplot(streaky_data, | |
aes(as.factor(biweek), launch_speed)) + | |
geom_boxplot() + | |
stat_summary(fun.y = "median", color="red", | |
geom="point", size=3) + | |
facet_wrap(~ player_name) + | |
ylim(60, 110) + TH + xlab("Biweek") + | |
ggtitle("Four Streaky Batters with Respect to Launch Speed") | |
consistent_regs <- filter(Summary_regressions, | |
r.squared < .015) | |
consistent_data <- filter(sc_new, | |
batter %in% consistent_regs$batter) | |
ggplot(consistent_data, | |
aes(as.factor(biweek), launch_speed)) + | |
geom_boxplot() + | |
stat_summary(fun.y = "median", color="red", | |
geom="point", size=3) + | |
facet_wrap(~ player_name) + TH + ylab("Biweek") + | |
ggtitle("Four Consistent Hitters with Respect to Launch Speed") + | |
ylim(60, 110) | |
# relationship between AVG and average launch speed | |
streaky_data %>% | |
group_by(player_name, biweek) %>% | |
summarize(N = n(), | |
AVG = mean(HIT), | |
AVG_LS = mean(launch_speed), | |
AVG_LA = mean(launch_angle), | |
AVG_woba = mean(woba_value)) -> | |
S_new | |
ggplot(S_new, aes(AVG_LS, AVG)) + | |
geom_point() + | |
geom_smooth(method = "lm", se = FALSE) + | |
facet_wrap(~ player_name) + | |
TH + | |
ggtitle("Relationship Between Average Launch Speed | |
and AVG for Four Streaky Hitters") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment