Last active
August 29, 2015 14:01
-
-
Save bayesball/9d73c2c15f8c4f6d0d39 to your computer and use it in GitHub Desktop.
Graphs of Cubs and White Sox Median Attendance for Recent Seasons
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cubs / White Sox attendance study | |
# assume that Retrosheet game log files | |
# gl1900.txt, gl1901.txt, ... are | |
# contained in folder gamelogs | |
# the files gl1954.txt through gl2013.txt are used in this code | |
get.data <- function(team, Seasons){ | |
data <- NULL | |
for (year in Seasons){ | |
filename <- paste("gamelogs/gl",year,".txt", sep="") | |
d <- read.csv(filename, header=FALSE) | |
headers <- read.csv("gamelogs/game_log_header.csv") | |
names(d) <- names(headers) | |
data <- rbind(data, subset(d, HomeTeam==team))} | |
data$Season <- substr(data$Date, 1, 4) | |
require(dplyr) | |
S <- summarize(group_by(data, Season), | |
Median=median(Attendence)) | |
S$Season <- as.numeric(S$Season) | |
S | |
} | |
cubs <- get.data("CHN", 1964:2013) | |
sox <- get.data("CHA", 1964:2013) | |
library(ggplot2) | |
ggplot(cubs, aes(Season, Median)) + | |
geom_point(size=3, color="red") + | |
geom_smooth(method="loess", span=0.25, size=2) + | |
labs(title="Cubs Median Attendance: 1954 - 2013") + | |
theme(plot.title = element_text(size = rel(2))) + | |
theme(axis.title = element_text(size = rel(2))) + | |
theme(axis.text = element_text(size = rel(2))) + | |
annotate("rect", xmin=1988, xmax=2013, ymin=0, ymax=45000, | |
alpha=0.2, fill="yellow") | |
ggplot(sox, aes(Season, Median)) + | |
geom_point(size=3, color="red") + | |
geom_smooth(method="loess", span=0.25, size=2) + | |
labs(title="White Sox Median Attendance: 1954 - 2013") + | |
theme(plot.title = element_text(size = rel(2))) + | |
theme(axis.title = element_text(size = rel(2))) + | |
theme(axis.text = element_text(size = rel(2))) | |
# look at ratio median Cubs attendance divided by | |
# median Sox attendance | |
bothteams <- merge(cubs, sox, by="Season") | |
bothteams$Ratio <- with(bothteams, Median.x / Median.y) | |
ggplot(bothteams, aes(Season, Ratio)) + | |
geom_point(size=3, color="red") + | |
geom_smooth(method="loess", span=0.25, size=2) + | |
labs(title="Ratio of Cubs / Sox Attendance: 1954 - 2013") + | |
theme(plot.title = element_text(size = rel(2))) + | |
theme(axis.title = element_text(size = rel(2))) + | |
theme(axis.text = element_text(size = rel(2))) + | |
geom_hline(yintercept=1) + | |
ylim(-.5, 4.5) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment