Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/9d73c2c15f8c4f6d0d39 to your computer and use it in GitHub Desktop.
Save bayesball/9d73c2c15f8c4f6d0d39 to your computer and use it in GitHub Desktop.
Graphs of Cubs and White Sox Median Attendance for Recent Seasons
# Cubs / White Sox attendance study
# assume that Retrosheet game log files
# gl1900.txt, gl1901.txt, ... are
# contained in folder gamelogs
# the files gl1954.txt through gl2013.txt are used in this code
get.data <- function(team, Seasons){
data <- NULL
for (year in Seasons){
filename <- paste("gamelogs/gl",year,".txt", sep="")
d <- read.csv(filename, header=FALSE)
headers <- read.csv("gamelogs/game_log_header.csv")
names(d) <- names(headers)
data <- rbind(data, subset(d, HomeTeam==team))}
data$Season <- substr(data$Date, 1, 4)
require(dplyr)
S <- summarize(group_by(data, Season),
Median=median(Attendence))
S$Season <- as.numeric(S$Season)
S
}
cubs <- get.data("CHN", 1964:2013)
sox <- get.data("CHA", 1964:2013)
library(ggplot2)
ggplot(cubs, aes(Season, Median)) +
geom_point(size=3, color="red") +
geom_smooth(method="loess", span=0.25, size=2) +
labs(title="Cubs Median Attendance: 1954 - 2013") +
theme(plot.title = element_text(size = rel(2))) +
theme(axis.title = element_text(size = rel(2))) +
theme(axis.text = element_text(size = rel(2))) +
annotate("rect", xmin=1988, xmax=2013, ymin=0, ymax=45000,
alpha=0.2, fill="yellow")
ggplot(sox, aes(Season, Median)) +
geom_point(size=3, color="red") +
geom_smooth(method="loess", span=0.25, size=2) +
labs(title="White Sox Median Attendance: 1954 - 2013") +
theme(plot.title = element_text(size = rel(2))) +
theme(axis.title = element_text(size = rel(2))) +
theme(axis.text = element_text(size = rel(2)))
# look at ratio median Cubs attendance divided by
# median Sox attendance
bothteams <- merge(cubs, sox, by="Season")
bothteams$Ratio <- with(bothteams, Median.x / Median.y)
ggplot(bothteams, aes(Season, Ratio)) +
geom_point(size=3, color="red") +
geom_smooth(method="loess", span=0.25, size=2) +
labs(title="Ratio of Cubs / Sox Attendance: 1954 - 2013") +
theme(plot.title = element_text(size = rel(2))) +
theme(axis.title = element_text(size = rel(2))) +
theme(axis.text = element_text(size = rel(2))) +
geom_hline(yintercept=1) +
ylim(-.5, 4.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment