Last active
August 29, 2015 14:24
-
-
Save bayesball/6708ad7752cf593b6824 to your computer and use it in GitHub Desktop.
R code to look at attendance drops for each team
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load relevant packages | |
library(dplyr) | |
library(ggplot2) | |
# function will download retrosheet game log data for a particular | |
# season | |
load.gamelog <- function(season, headers){ | |
download.file( | |
url=paste("http://www.retrosheet.org/gamelogs/gl", season, ".zip" | |
, sep="") | |
, destfile=paste("gl", season, ".zip", sep="") | |
) | |
unzip(paste("gl", season, ".zip", sep="")) | |
gamelog <- read.table(paste("gl", season, ".txt", sep="") | |
, sep=",", stringsAsFactors=F) | |
names(gamelog) <- names(headers) | |
file.remove(paste("gl", season, ".zip", sep="")) | |
file.remove(paste("gl", season, ".txt", sep="")) | |
gamelog | |
} | |
# the header file is loaded from my website | |
headers <- read.csv("http://personal.bgsu.edu/~albert/baseball/game_log_header.csv") | |
d <- load.gamelog(2014, headers) | |
# create a variable indicating 1st or 2nd half | |
# compute the home mean attendance for each team for each half | |
d <- mutate(d, Half=ifelse(Date < 20140701, 1, 2)) | |
S <- summarize(group_by(d, HomeTeam, Half), | |
A=mean(Attendence), N=length(Attendence)) | |
# merge the 1st half and 2nd half attendance means | |
twohalves <- merge(filter(S, Half==1), filter(S, Half==2), | |
by="HomeTeam") | |
# create a variable Winner which indicates which team won the game | |
d <- mutate(d, Winner=ifelse(HomeRunsScore > VisitorRunsScored, | |
HomeTeam, VisitingTeam), | |
Loser=ifelse(HomeRunsScore > VisitorRunsScored, | |
VisitingTeam, HomeTeam)) | |
# tabulate the number of wins and losses for each team in each half | |
Wins <- with(d, table(Winner, Half)) | |
Losses <- with(d, table(Loser, Half)) | |
# add the first half wins and losses to the data frame | |
twohalves$W1 <- Wins[, 1] | |
twohalves$L1 <- Losses[, 1] | |
# create two plots | |
p1 <- ggplot(twohalves, aes((A.x + A.y) / 2, | |
100 * (A.y - A.x) / A.x, label=HomeTeam)) + | |
geom_text(color="blue") + | |
xlab("MEAN ATTENDANCE") + | |
ylab("% CHANGE IN ATTENDANCE") + | |
ggtitle("Mean-Difference Plot") + | |
geom_hline(yintercept=0, color="red") | |
p2 <- ggplot(twohalves, aes(W1 - L1, | |
100 * (A.y - A.x) / A.x, label=HomeTeam)) + | |
geom_text(color="blue") + | |
xlab("GAMES OVER .500 AT MIDSEASON") + | |
ylab("% CHANGE IN ATTENDANCE") + | |
ggtitle("Mid-Season Record and Change in Attendance") + | |
geom_hline(yintercept=0, color="red") | |
print(p1) | |
print(p2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment