Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created July 5, 2017 00:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/92ed3f3d3edf41c58652ca96b006fa9a to your computer and use it in GitHub Desktop.
Save bayesball/92ed3f3d3edf41c58652ca96b006fa9a to your computer and use it in GitHub Desktop.
Prediction of 2nd Half Team Records
# illustration of producing graph for 2015 season
# I am assuming that the Retrosheet game log files are in the folder
# ~/Google Drive/gamelogs/gamelogs/
# output <- all_work(2015)
# output$p
all_work <- function(season){
require(readr)
require(lubridate)
require(dplyr)
require(ggplot2)
require(ggrepel)
TH <- theme(
plot.title = element_text(
colour = "blue",
size = 18,
hjust = 0.5,
vjust = 0.8,
angle = 0
)
)
file_name <- paste("~/Google Drive/gamelogs/gamelogs/gl",
season, ".txt", sep="")
gldata <- read_csv(file_name,
col_names = FALSE)
headers <- read_csv("~/Google Drive/gamelogs/gamelogs/game_log_header.csv")
names(gldata) <- names(headers)
gldata$DATE <- ymd(gldata$Date)
get_standings <- function(glyear){
glyear <- mutate(glyear,
Winner = ifelse(VisitorRunsScored < HomeRunsScore,
1, 0))
S1 <- summarize(group_by(glyear, HomeTeam),
W=sum(Winner), L=sum(1 - Winner))
S2 <- summarize(group_by(glyear, VisitingTeam),
W=sum(1 - Winner), L=sum(Winner))
S <- inner_join(S1, S2,
by=c("HomeTeam" = "VisitingTeam"))
names(S)[1] <- "Team"
F <- select(mutate(S, W=W.x + W.y,
L=L.x + L.y), Team, W, L)
Total <- sum(F$W + F$L)
Total_season <- 162 * 30
list(Standings = F,
Percentage = 100 * (Total / Total_season))
}
date1 <- paste(season, "-07-03", sep="")
date2 <- paste(season, "-07-02", sep="")
ff1 <- get_standings(filter(gldata,
DATE < date2))
ff2 <- get_standings(filter(gldata,
DATE > date1))
ff <- inner_join(ff1$Standings, ff2$Standings, by="Team")
ff <- mutate(ff,
WL_diff_1 = W.x - L.x,
WL_diff_2 = W.y - L.y)
p <- ggplot(ff,
aes(WL_diff_1, WL_diff_2 - WL_diff_1, label=Team)) +
geom_text_repel() +
geom_smooth(method="lm", se=FALSE) +
geom_hline(yintercept = 0, color="red", linetype="dashed") +
geom_vline(xintercept = 0, color="red", linetype="dashed") +
xlab("W - L (First Half)") +
ylab("W - L (Second) Minus W - L (First)") +
ggtitle(paste(season, "Season, Slope = ",
round(output$slope, 2))) +
TH
fit <- lm(I(WL_diff_2 - WL_diff_1) ~ 0 + WL_diff_1,
data=ff)
list(p=p, slope=fit$coef)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment