Created
July 5, 2017 00:41
-
-
Save bayesball/92ed3f3d3edf41c58652ca96b006fa9a to your computer and use it in GitHub Desktop.
Prediction of 2nd Half Team Records
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# illustration of producing graph for 2015 season | |
# I am assuming that the Retrosheet game log files are in the folder | |
# ~/Google Drive/gamelogs/gamelogs/ | |
# output <- all_work(2015) | |
# output$p | |
all_work <- function(season){ | |
require(readr) | |
require(lubridate) | |
require(dplyr) | |
require(ggplot2) | |
require(ggrepel) | |
TH <- theme( | |
plot.title = element_text( | |
colour = "blue", | |
size = 18, | |
hjust = 0.5, | |
vjust = 0.8, | |
angle = 0 | |
) | |
) | |
file_name <- paste("~/Google Drive/gamelogs/gamelogs/gl", | |
season, ".txt", sep="") | |
gldata <- read_csv(file_name, | |
col_names = FALSE) | |
headers <- read_csv("~/Google Drive/gamelogs/gamelogs/game_log_header.csv") | |
names(gldata) <- names(headers) | |
gldata$DATE <- ymd(gldata$Date) | |
get_standings <- function(glyear){ | |
glyear <- mutate(glyear, | |
Winner = ifelse(VisitorRunsScored < HomeRunsScore, | |
1, 0)) | |
S1 <- summarize(group_by(glyear, HomeTeam), | |
W=sum(Winner), L=sum(1 - Winner)) | |
S2 <- summarize(group_by(glyear, VisitingTeam), | |
W=sum(1 - Winner), L=sum(Winner)) | |
S <- inner_join(S1, S2, | |
by=c("HomeTeam" = "VisitingTeam")) | |
names(S)[1] <- "Team" | |
F <- select(mutate(S, W=W.x + W.y, | |
L=L.x + L.y), Team, W, L) | |
Total <- sum(F$W + F$L) | |
Total_season <- 162 * 30 | |
list(Standings = F, | |
Percentage = 100 * (Total / Total_season)) | |
} | |
date1 <- paste(season, "-07-03", sep="") | |
date2 <- paste(season, "-07-02", sep="") | |
ff1 <- get_standings(filter(gldata, | |
DATE < date2)) | |
ff2 <- get_standings(filter(gldata, | |
DATE > date1)) | |
ff <- inner_join(ff1$Standings, ff2$Standings, by="Team") | |
ff <- mutate(ff, | |
WL_diff_1 = W.x - L.x, | |
WL_diff_2 = W.y - L.y) | |
p <- ggplot(ff, | |
aes(WL_diff_1, WL_diff_2 - WL_diff_1, label=Team)) + | |
geom_text_repel() + | |
geom_smooth(method="lm", se=FALSE) + | |
geom_hline(yintercept = 0, color="red", linetype="dashed") + | |
geom_vline(xintercept = 0, color="red", linetype="dashed") + | |
xlab("W - L (First Half)") + | |
ylab("W - L (Second) Minus W - L (First)") + | |
ggtitle(paste(season, "Season, Slope = ", | |
round(output$slope, 2))) + | |
TH | |
fit <- lm(I(WL_diff_2 - WL_diff_1) ~ 0 + WL_diff_1, | |
data=ff) | |
list(p=p, slope=fit$coef) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment