Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active August 29, 2015 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/6708ad7752cf593b6824 to your computer and use it in GitHub Desktop.
Save bayesball/6708ad7752cf593b6824 to your computer and use it in GitHub Desktop.
R code to look at attendance drops for each team
# load relevant packages
library(dplyr)
library(ggplot2)
# function will download retrosheet game log data for a particular
# season
load.gamelog <- function(season, headers){
download.file(
url=paste("http://www.retrosheet.org/gamelogs/gl", season, ".zip"
, sep="")
, destfile=paste("gl", season, ".zip", sep="")
)
unzip(paste("gl", season, ".zip", sep=""))
gamelog <- read.table(paste("gl", season, ".txt", sep="")
, sep=",", stringsAsFactors=F)
names(gamelog) <- names(headers)
file.remove(paste("gl", season, ".zip", sep=""))
file.remove(paste("gl", season, ".txt", sep=""))
gamelog
}
# the header file is loaded from my website
headers <- read.csv("http://personal.bgsu.edu/~albert/baseball/game_log_header.csv")
d <- load.gamelog(2014, headers)
# create a variable indicating 1st or 2nd half
# compute the home mean attendance for each team for each half
d <- mutate(d, Half=ifelse(Date < 20140701, 1, 2))
S <- summarize(group_by(d, HomeTeam, Half),
A=mean(Attendence), N=length(Attendence))
# merge the 1st half and 2nd half attendance means
twohalves <- merge(filter(S, Half==1), filter(S, Half==2),
by="HomeTeam")
# create a variable Winner which indicates which team won the game
d <- mutate(d, Winner=ifelse(HomeRunsScore > VisitorRunsScored,
HomeTeam, VisitingTeam),
Loser=ifelse(HomeRunsScore > VisitorRunsScored,
VisitingTeam, HomeTeam))
# tabulate the number of wins and losses for each team in each half
Wins <- with(d, table(Winner, Half))
Losses <- with(d, table(Loser, Half))
# add the first half wins and losses to the data frame
twohalves$W1 <- Wins[, 1]
twohalves$L1 <- Losses[, 1]
# create two plots
p1 <- ggplot(twohalves, aes((A.x + A.y) / 2,
100 * (A.y - A.x) / A.x, label=HomeTeam)) +
geom_text(color="blue") +
xlab("MEAN ATTENDANCE") +
ylab("% CHANGE IN ATTENDANCE") +
ggtitle("Mean-Difference Plot") +
geom_hline(yintercept=0, color="red")
p2 <- ggplot(twohalves, aes(W1 - L1,
100 * (A.y - A.x) / A.x, label=HomeTeam)) +
geom_text(color="blue") +
xlab("GAMES OVER .500 AT MIDSEASON") +
ylab("% CHANGE IN ATTENDANCE") +
ggtitle("Mid-Season Record and Change in Attendance") +
geom_hline(yintercept=0, color="red")
print(p1)
print(p2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment