Created
April 24, 2016 23:29
-
-
Save josep2/58eba839cdab29b516597f50a26ca7ed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("/Users/josep2/Desktop/blogs/baseball/") | |
library(ggplot2) | |
library(dplyr) | |
library(lubridate) | |
library(scales) | |
library(grid) | |
library(RColorBrewer) | |
library(extrafont) | |
library(reshape) | |
library(ggthemes) | |
library(tidyr) | |
library(reshape2) | |
my_theme <- function() { | |
# Generate the colors for the chart procedurally with RColorBrewer | |
palette <- brewer.pal("Greys", n=9) | |
color.background = palette[2] | |
color.grid.major = palette[3] | |
color.axis.text = palette[6] | |
color.axis.title = palette[7] | |
color.title = palette[9] | |
# Begin construction of chart | |
theme_bw(base_size=9) + | |
# Set the entire chart region to a light gray color | |
theme(panel.background=element_rect(fill=color.background, color=color.background)) + | |
theme(plot.background=element_rect(fill=color.background, color=color.background)) + | |
theme(panel.border=element_rect(color=color.background)) + | |
# Format the grid | |
theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) + | |
theme(panel.grid.minor=element_blank()) + | |
theme(axis.ticks=element_blank()) + | |
# Format the legend, but hide by default | |
theme(legend.position="none") + | |
theme(legend.background = element_rect(fill=color.background)) + | |
theme(legend.text = element_text(size=7,color=color.axis.title)) + | |
# Set title and axis labels, and format these and tick marks | |
theme(plot.title=element_text(color=color.title, size=15, vjust=1.25,face="bold", family="Hack")) + | |
theme(axis.text.x=element_text(size=9,color=color.axis.text)) + | |
theme(axis.text.y=element_text(size=7,color=color.axis.text)) + | |
theme(axis.title.x=element_text(size=10,color=color.axis.title, vjust=0, face="bold", family= "Hack")) + | |
theme(axis.title.y=element_text(size=10,color=color.axis.title, vjust=1.25, face="bold", family = "Hack")) + | |
theme(strip.text.y = element_text(size = 8, colour = "black", face="bold", family = "Hack")) + | |
theme(strip.background = element_rect(fill=color.background, color=color.background, size=1)) + | |
# Plot margins | |
theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm")) | |
} | |
baseball_data = read.csv("baseballdata.csv", header=TRUE) | |
baseball_cost_data <- select(baseball_data,team, ticket_price, hot_dog, beer, parking) | |
baseball_cost_data = melt(baseball_cost_data) | |
facet_names <-c( | |
'ticket_price' = "Ticket Price", | |
'hot_dog' = "Hot Dog", | |
'beer' = "Beer", | |
'parking' = "Parking" | |
) | |
### Distribution of costs / histogram and density | |
ggplot()+geom_histogram(binwidth=10,data=subset(baseball_cost_data, variable=="ticket_price"), aes(value), fill="orange", alpha=.5)+ | |
geom_histogram(binwidth=1,data=subset(baseball_cost_data, variable=="hot_dog"), aes(value), fill= "orange", alpha=.5)+ | |
geom_histogram(binwidth=1, data=subset(baseball_cost_data, variable=="beer"), aes(value), fill= "orange", alpha=.5)+ | |
geom_histogram(binwidth=5, data=subset(baseball_cost_data, variable=="parking"), aes(value), fill = "orange", alpha=.5)+ | |
facet_wrap(~variable, scales = "free", labeller = as_labeller(facet_names))+ | |
my_theme()+ | |
scale_x_continuous(labels=scales::dollar)+ | |
labs(x = "Dollars", y= "Count", title="Distribution of MLB Game Costs") | |
ggsave("blog_vis1.png", dpi=300, width=10, height=9) | |
### Concessions | |
ggplot()+geom_bar(stat= "identity", data=subset(baseball_cost_data, variable=="hot_dog"), aes(team, value), fill="purple", alpha=.5)+ | |
geom_bar(stat= "identity", data=subset(baseball_cost_data, variable=="beer"), aes(team, value), fill="purple", alpha=.5)+ | |
facet_wrap(~variable, labeller =as_labeller((facet_names)))+ | |
coord_flip()+my_theme()+ | |
scale_y_continuous(labels=scales::dollar)+ | |
labs(x="Team", y= "Dollars", title="MLB Beer & Hot Dog Costs by Team") | |
ggplotly() | |
ggsave("blog_vis2.png", dpi=300, width=10, height=9) | |
## Parking | |
ggplot(baseball_data, aes(x=reorder(team, +parking), y=parking))+ | |
geom_bar(stat="identity", alpha=.5, fill="#9999ff")+coord_flip()+ | |
labs(x="Team", y="Dollars", title="Parking Costs By MLB Team")+ | |
scale_y_continuous(labels=scales::dollar, breaks=seq(0,50,5))+ | |
my_theme() | |
ggsave("blog_vis3.png", dpi=300, width=10, height=9) | |
### Tickets Prices | |
ggplot(baseball_data, aes(x=reorder(team, +ticket_price), y=ticket_price))+ | |
geom_bar(stat="identity", alpha=.5, fill="#00cc99")+coord_flip()+ | |
labs(x="Team", y="Dollars", title="Ticket Costs By MLB Team")+ | |
scale_y_continuous(labels=scales::dollar, breaks=seq(0,50,5))+ | |
my_theme() | |
ggsave("blog_vis4.png", dpi=300, width=10, height=9) | |
## Populations / Median Income / Percent In Poverty (Plotly) | |
library(plotly) | |
g<-plot_ly(baseball_data, x = county_population, y = median_income, text = paste("Team: ", team), | |
mode = "markers", color=percent_in_poverty) | |
plotly_POST(g, filename="median_income") | |
##Edit the rest in plotly UI | |
## Highest percent of pay check for Game + Hotdog + beer and parking | |
ggplot(baseball_data, aes(x=reorder(team, +game_burden), y=game_burden))+ | |
geom_bar(stat="identity", alpha=.75, fill="#ffc019")+coord_flip()+ | |
labs(x="Team", y="Percent Of Check", title="Ticket, Parking, Beer and Hotdog by Team")+ | |
scale_y_continuous(labels=scales::percent, breaks=seq(0,.08,.005))+ | |
my_theme() | |
ggsave("blog_vis5.png", dpi=300, width=10, height=9) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment