Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active October 11, 2018 11:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/64639e77cb461e92d13089a29b0e8eef to your computer and use it in GitHub Desktop.
Save bayesball/64639e77cb461e92d13089a29b0e8eef to your computer and use it in GitHub Desktop.
History View of Three Hitting Rates
## Load Lahman Teams data frame
library(tidyverse)
Teams <- read_csv("~/Dropbox/Google Drive/Lahman/core/Teams.csv")
Teams %>% filter(yearID >= 1913) %>%
group_by(yearID) %>%
summarize(AB = sum(AB),
HR = sum(HR),
SO = sum(SO),
H = sum(H)) -> S
rbind(S, data.frame(yearID = 2018,
AB = 165432, HR = 5585, SO = 41207, H = 41019 )) -> S
S %>% mutate(SO_Rate = SO / AB,
HR_Rate = HR / (AB - SO),
H_Rate = (H - HR) / (AB - HR - SO)) -> S
S %>% select(yearID, SO_Rate, HR_Rate, H_Rate) %>%
gather(Type, Rate, - yearID) -> Sg
TH <- theme(
plot.title = element_text(
colour = "blue",
size = 18,
hjust = 0.5
)
)
## ------------------------------------------------------------------------
ggplot(Sg, aes(yearID, Rate, color = Type)) +
geom_point(size = 1.5) +
ggtitle("Historical View of Three Rates") + TH
## ------------------------------------------------------------------------
logit <- function(x){ log(x / (1 - x))}
ggplot(Sg, aes(yearID, logit(Rate), color = Type)) +
geom_point(size = 1.5) + geom_smooth(se = FALSE,
span = 0.2, method = "loess") +
TH + ggtitle("Logits of Three Rates")
## ------------------------------------------------------------------------
ggplot(filter(Sg, yearID >= 1975),
aes(yearID, logit(Rate), color = Type)) +
geom_point() + geom_smooth(se = FALSE,
method = "lm") +
ggtitle("Logits of Three Rates Since 1975") + TH
## ------------------------------------------------------------------------
filter(Sg, yearID == 1975) %>%
group_by(Type) %>% summarize(Rate75 = first(Rate)) -> S75
inner_join(filter(Sg, yearID >= 1975), S75, by = "Type") ->
Sgnew
## ------------------------------------------------------------------------
ggplot(Sgnew, aes(yearID, logit(Rate) -
logit(Rate75),
color = Type)) +
geom_point() + geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ Type, ncol = 1)
## ------------------------------------------------------------------------
ggplot(filter(Sgnew,
Type %in% c("HR_Rate","SO_Rate")),
aes(yearID, logit(Rate) -
logit(Rate75),
color = Type)) +
geom_point() + geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ Type, ncol = 1) + TH +
theme(text = element_text(size = 18))
## ------------------------------------------------------------------------
library(broom)
Sgnew %>% group_by(Type) %>%
do(tidy(lm(I(logit(Rate)) ~ yearID, data=.))) %>%
filter(term == "yearID")
#-----------------------------------------------------
library(BApredict)
d <- collect_hitting_data()
d %>% mutate(SO_Rate = SO / AB,
HR_Rate = HR / (AB - SO),
H_Rate = (H - HR) / (AB - HR - SO)) -> S
ggplot(S, aes(SO_Rate, HR_Rate, label = playerID)) +
geom_point(size = 2, color = "blue") +
geom_label(data = filter(S, SO_Rate > .35 |
HR_Rate > .11),
color = "red") +
TH + ggtitle("SO and HR Rates for 2018 Players") +
theme(text = element_text(size = 18))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment