Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created December 15, 2018 13:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/ff7e1a8ba9b31a888fc7b36756154203 to your computer and use it in GitHub Desktop.
Save bayesball/ff7e1a8ba9b31a888fc7b36756154203 to your computer and use it in GitHub Desktop.
Does Plate Discipline Erode
# read in Statcast data for two seasons
library(tidyverse)
sc <- read_csv("../StatcastData/statcast2018new.csv")
sc17 <- read_csv("../StatcastData/statcast2017.csv")
# erode function will complete individual regression estimates for all
# players who have seen 1000 called pitches
erode <- function(sc){
require(tidyverse)
require(lubridate)
require(broom)
sc %>% filter(description %in% c("ball", "blocked_ball",
"called_strike")) %>%
mutate(Months = (as.numeric(ymd(game_date) -
ymd(min(sc$game_date)))) / 30) %>%
select(description, player_name, Months) -> sc_called
sc_called %>% group_by(player_name) %>%
summarize(Called = n()) %>%
inner_join(sc_called) %>%
filter(Called >= 1000) -> sc_called_1000
regressions <- sc_called_1000 %>%
split(pull(., player_name)) %>%
map(~ glm(description == "ball" ~ Months,
data = .,
family = binomial)) %>%
map_df(tidy, .id = "Name") %>%
as_tibble()
regressions %>% select(Name, term, estimate) %>%
spread(term, estimate) -> reg
names(reg)[2] <- "Intercept"
return(reg)
}
# get estimates for the two seasons
erode(sc) -> reg_18
erode(sc17) -> reg_17
# graph of estimates using 2018 data
library(ggrepel)
library(ggplot2)
ggplot(reg_18, aes(Intercept, Months, label = Name)) +
geom_point() +
geom_label_repel(data = filter(reg_18, Intercept > 1.0 |
Intercept < 0.15 |
Months > 0.10)) +
geom_hline(yintercept = 0, color = "red") +
ggtitle("Intercept and Slope Estimates for the Individual Logistic Regressions")
# merge the estimates for the two seasons
# construct scatterplots of the estimates
inner_join(reg_17, reg_18, by = "Name") -> two_years
p1 <- ggplot(two_years, aes(Intercept.x, Intercept.y)) +
geom_point() +
xlab("2017 Intercept Estimate") +
ylab("2018 Intercept Estimate") +
ggtitle("Scatterplot of Intercept Estimates: Correlation = 0.513") +
theme(plot.title = element_text(colour = "blue",
size = 18, hjust = 0.5))
p2 <- ggplot(two_years, aes(Months.x, Months.y)) +
geom_point() +
xlab("2017 Slope Estimate") +
ylab("2018 Slope Estimate") +
ggtitle("Scatterplot of Slope Estimates: Correlation = 0.056") +
theme(plot.title = element_text(colour = "blue",
size = 18, hjust = 0.5))
cor(select(two_years, Intercept.x, Intercept.y,
Months.x, Months.y))
library(gridExtra)
grid.arrange(p1, p2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment