Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active March 9, 2018 13:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/af2afa83dddefe790866ba772afc3329 to your computer and use it in GitHub Desktop.
Save bayesball/af2afa83dddefe790866ba772afc3329 to your computer and use it in GitHub Desktop.
Looking for streaky patterns in Statcast launch speeds
# load in packages
library(tidyverse)
library(broom)
library(lubridate)
# read in statcast data
# see https://gist.github.com/bayesball/2d51673b7c10842baaee18b110b1a2e9
# to see how one downloads this data using the baseballr package
statcast2017 <- read_csv("~/Dropbox/2016 WORK/BLOG Baseball R/OTHER/rand_effects_statcast/statcast2017.csv")
# look only on balls in play
statcast2017 %>% filter(type == "X") %>%
mutate(HIT = ifelse(events %in%
c("single", "double", "triple", "home_run"),
1, 0)) ->
sc2017_ip
# want to limit to batters who have at least
# N_min balls in play
sc2017_ip %>% group_by(batter) %>% count() ->
S
Nmin <- 400
inner_join(sc2017_ip, S) %>%
filter(n >= Nmin) -> sc2017_ip_2
# theme for plotting title in ggplot2
TH <- theme(
plot.title = element_text(
colour = "blue",
size = 16,
hjust = 0.5,
vjust = 0.8,
angle = 0
)
)
# only want data from 2017 regular season
# break into 13 two-week periods (add last Sunday to
# last biweek)
sc2017_ip_2 %>%
filter(game_date <= "2017-10-01") %>%
mutate(biweek = floor(week(game_date) / 2),
biweek = ifelse(biweek > 19, 19, biweek)) ->
sc_new
# confirm that we have chosen reasonable values of
# biweek
ggplot(sc_new, aes(biweek)) +
geom_bar()
###### some fitting
sc_new %>% group_by(biweek) %>%
summarize(mean_LS = mean(launch_speed),
mean_LA = mean(launch_angle)) ->
S2
# how has average launch speed changed over the
# 2017 season?
p1 <- ggplot(S2, aes(biweek, mean_LS)) +
geom_point() + TH + geom_smooth() +
ggtitle("Average Launch Speed Through the Season")
# How about launch angle?
p2 <- ggplot(S2, aes(biweek, mean_LA)) +
geom_point() + TH + geom_smooth() +
ggtitle("Average Launch Angle Through the Season")
library(gridExtra)
grid.arrange(p1, p2)
###################
# fit many regressions (by player)
# predicting launch speed by biweek
regressions <- sc_new %>%
group_by(batter) %>%
do(fit = lm(launch_speed ~ factor(biweek),
data = .))
regressions %>% glance(fit) -> Summary_regressions
# graph R^2 values for all players
Summary_regressions %>%
ggplot(aes(as.factor(batter), r.squared)) +
geom_point() + coord_flip() +
xlab("Player ID") + TH +
ggtitle("R Squared Values for All Batters")
# look at four players where the r.squared value
# exceeds 0.06
streaky_regs <- filter(Summary_regressions,
r.squared > .06)
streaky_data <- filter(sc_new,
batter %in% streaky_regs$batter)
ggplot(streaky_data,
aes(as.factor(biweek), launch_speed)) +
geom_boxplot() +
stat_summary(fun.y = "median", color="red",
geom="point", size=3) +
facet_wrap(~ player_name) +
ylim(60, 110) + TH + xlab("Biweek") +
ggtitle("Four Streaky Batters with Respect to Launch Speed")
consistent_regs <- filter(Summary_regressions,
r.squared < .015)
consistent_data <- filter(sc_new,
batter %in% consistent_regs$batter)
ggplot(consistent_data,
aes(as.factor(biweek), launch_speed)) +
geom_boxplot() +
stat_summary(fun.y = "median", color="red",
geom="point", size=3) +
facet_wrap(~ player_name) + TH + ylab("Biweek") +
ggtitle("Four Consistent Hitters with Respect to Launch Speed") +
ylim(60, 110)
# relationship between AVG and average launch speed
streaky_data %>%
group_by(player_name, biweek) %>%
summarize(N = n(),
AVG = mean(HIT),
AVG_LS = mean(launch_speed),
AVG_LA = mean(launch_angle),
AVG_woba = mean(woba_value)) ->
S_new
ggplot(S_new, aes(AVG_LS, AVG)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ player_name) +
TH +
ggtitle("Relationship Between Average Launch Speed
and AVG for Four Streaky Hitters")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment