Created
December 15, 2018 13:29
-
-
Save bayesball/ff7e1a8ba9b31a888fc7b36756154203 to your computer and use it in GitHub Desktop.
Does Plate Discipline Erode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# read in Statcast data for two seasons | |
library(tidyverse) | |
sc <- read_csv("../StatcastData/statcast2018new.csv") | |
sc17 <- read_csv("../StatcastData/statcast2017.csv") | |
# erode function will complete individual regression estimates for all | |
# players who have seen 1000 called pitches | |
erode <- function(sc){ | |
require(tidyverse) | |
require(lubridate) | |
require(broom) | |
sc %>% filter(description %in% c("ball", "blocked_ball", | |
"called_strike")) %>% | |
mutate(Months = (as.numeric(ymd(game_date) - | |
ymd(min(sc$game_date)))) / 30) %>% | |
select(description, player_name, Months) -> sc_called | |
sc_called %>% group_by(player_name) %>% | |
summarize(Called = n()) %>% | |
inner_join(sc_called) %>% | |
filter(Called >= 1000) -> sc_called_1000 | |
regressions <- sc_called_1000 %>% | |
split(pull(., player_name)) %>% | |
map(~ glm(description == "ball" ~ Months, | |
data = ., | |
family = binomial)) %>% | |
map_df(tidy, .id = "Name") %>% | |
as_tibble() | |
regressions %>% select(Name, term, estimate) %>% | |
spread(term, estimate) -> reg | |
names(reg)[2] <- "Intercept" | |
return(reg) | |
} | |
# get estimates for the two seasons | |
erode(sc) -> reg_18 | |
erode(sc17) -> reg_17 | |
# graph of estimates using 2018 data | |
library(ggrepel) | |
library(ggplot2) | |
ggplot(reg_18, aes(Intercept, Months, label = Name)) + | |
geom_point() + | |
geom_label_repel(data = filter(reg_18, Intercept > 1.0 | | |
Intercept < 0.15 | | |
Months > 0.10)) + | |
geom_hline(yintercept = 0, color = "red") + | |
ggtitle("Intercept and Slope Estimates for the Individual Logistic Regressions") | |
# merge the estimates for the two seasons | |
# construct scatterplots of the estimates | |
inner_join(reg_17, reg_18, by = "Name") -> two_years | |
p1 <- ggplot(two_years, aes(Intercept.x, Intercept.y)) + | |
geom_point() + | |
xlab("2017 Intercept Estimate") + | |
ylab("2018 Intercept Estimate") + | |
ggtitle("Scatterplot of Intercept Estimates: Correlation = 0.513") + | |
theme(plot.title = element_text(colour = "blue", | |
size = 18, hjust = 0.5)) | |
p2 <- ggplot(two_years, aes(Months.x, Months.y)) + | |
geom_point() + | |
xlab("2017 Slope Estimate") + | |
ylab("2018 Slope Estimate") + | |
ggtitle("Scatterplot of Slope Estimates: Correlation = 0.056") + | |
theme(plot.title = element_text(colour = "blue", | |
size = 18, hjust = 0.5)) | |
cor(select(two_years, Intercept.x, Intercept.y, | |
Months.x, Months.y)) | |
library(gridExtra) | |
grid.arrange(p1, p2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment