Skip to content

Instantly share code, notes, and snippets.

@patternproject
Last active June 28, 2016 12:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save patternproject/cecdcf5a669f0d6269bb5726f9649340 to your computer and use it in GitHub Desktop.
Save patternproject/cecdcf5a669f0d6269bb5726f9649340 to your computer and use it in GitHub Desktop.
#### Load Data ####
df.winners = read.csv(
"winnersWithMean.csv",
header = TRUE,
stringsAsFactors = FALSE,
na.strings = c("", "NA")
)
# dput for df.winners
# at the end of the code
#### Manipulate Data ####
# making genre a factor
df.winners$genre = as.factor(df.winners$genre)
# for plotting setting Year as Date Type (else treated as integer)
df.winners$year2 = as.Date(as.character(df.winners$year),format="%Y")
## color codes ####
col.grey = "#707070"
col.teal = "#368C8C"
col.blue = "#4682B4"
col.mid.green = "#98EFC1"
col.lig.green = "#B8FFD1"
col.dark.red = "darkred"
#### Visulaize Data ####
## g.1 ##
g.1 = ggplot(data=df.winners) + theme_minimal()
g.1 = g.1 + geom_line(data = filter(df.winners, grepl('COMEDY', genre)),
aes(x = year2, y = mean.us.viewers.m),alpha=0.8,size=0.5,col=col.lig.green)
g.1 = g.1 + geom_point(data = filter(df.winners, grepl('COMEDY', genre)),
aes(x = year2, y = mean.us.viewers.m),alpha=0.6,size=2,col=col.blue)
g.1 = g.1 + geom_line(data = filter(df.winners, grepl('DRAMA', genre)),
aes(x = year2, y = mean.us.viewers.m),alpha=0.8,size=0.5,col=col.lig.green)
g.1 = g.1 + geom_point(data = filter(df.winners, grepl('DRAMA', genre)),
aes(x = year2, y = mean.us.viewers.m),alpha=0.4,size=2,col=col.dark.red)
g.2 = g.1 + theme (
panel.background = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
#plot.caption=element_text(size=8, margin=margin(t=24),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
#axis.title.x = element_blank(margin=margin(t=-24)),
#axis.title.y = element_blank()
axis.title.y = element_text(size = 8),
#axis.title.x = element_text(margin=margin(t=-20)),
axis.title.x = element_text(margin=margin(0,0,-10,0), vjust=-5),
# src:
# https://rud.is/b/2016/06/16/your-data-vis-spidey-sense-the-need-for-a-robust-utility-belt/
plot.subtitle=element_text(size=9.5, margin=margin(b=10)),
plot.caption=element_text(size=7, margin=margin(t=-10)),
# margin around entire plot (‘unit’ with the sizes of the top, right, bottom, and left
# margins)
plot.margin=margin(10,10,10,10)
)
g.2 = g.2 +
labs(
x = NULL,
y = NULL,
title = "How viewership varies for Emmy Winners",
subtitle = "Mean of \"US Viewers Per Episode\", in Millions across the years (Wikipedia)"#,
# overlaps x axis ----> PROBLEM 1
#caption = "Source: Wikipedia"
)
g.2 = g.2 +
scale_x_date(
breaks = seq(as.Date("2004/1/1"), as.Date("2016/1/1"), by = "years"),
labels = c("2004","","06","","08","","10","","12","","14","","16")
# using expand = c(0,0) # clips the graph ----> PROBLEM 2
)
g.2 = g.2 + scale_y_continuous(limits=c(0,20) # using expand = c(0,0) # clips the graph ----> PROBLEM 3)
# label for genre DRAMA
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
aes(x=as.Date("2004/2/1"), y=10, label="DRAMA Genre"),col=col.dark.red,alpha=0.4,fontface = "bold")
# label for genre COMEDY
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
aes(x=as.Date("2004/2/1"), y=5, label="COMEDY Genre"),col=col.blue,alpha=0.6,fontface = "bold")
# label for cross over point 1
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
aes(x=as.Date("2005/12/1"), y=19, label="Cross-over \n Point 1"))
# highlight circle 1 for cross over point 1
g.2 = g.2 + geom_point(data=data.frame(),aes(x=as.Date("2005/7/1"), y=19),alpha=0.1,size=8,col=col.grey)
# label for cross over point 2
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
aes(x=as.Date("2007/9/1"), y=4.3, label="Cross-over \n Point 2"))
# highlight circle 1 for cross over point 2
g.2 = g.2 + geom_point(data=data.frame(),aes(x=as.Date("2007/12/1"), y=5.7),alpha=0.1,size=8,col=col.grey)
# label for cross over point 3
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
aes(x=as.Date("2014/1/1"), y=5.3, label="Cross-over \n Point 3"))
# highlight circle 1 for cross over point 3
g.2 = g.2 + geom_point(data=data.frame(),aes(x=as.Date("2015/1/1"), y=5.7),alpha=0.1,size=8,col=col.grey)
# Big bold line at y=0
# src
# http://t-redactyl.io/blog/2016/01/creating-plots-in-r-using-ggplot2-part-3-bar-plots.html
g.2 = g.2 + geom_hline(yintercept=0,size=1.2,colour="#535353")
g.2
#### DPUT for DATA ####
> dput(df.winners)
structure(list(year = c(2015L, 2015L, 2014L, 2014L, 2013L, 2013L,
2012L, 2012L, 2011L, 2011L, 2010L, 2010L, 2009L, 2009L, 2008L,
2008L, 2007L, 2007L, 2006L, 2006L, 2005L, 2005L, 2004L, 2004L
), name = c("GAME OF THRONES ", "VEEP", "BREAKING BAD", "MODERN FAMILY",
"BREAKING BAD", "MODERN FAMILY", "HOMELAND", "MODERN FAMILY",
"MAD MEN", "MODERN FAMILY", "MAD MEN", "MODERN FAMILY", "MAD MEN",
"30 ROCK", "MAD MEN", "30 ROCK", "THE SOPRANOS", "30 ROCK", "24",
"THE OFFICE", "LOST", "EVERYBODY LOVES RAYMOND", "THE SOPRANOS",
"ARRESTED DEVELOPMENT "), genre = c("DRAMA", "COMEDY", "DRAMA",
"COMEDY", "DRAMA", "COMEDY", "DRAMA", "COMEDY", "DRAMA", "COMEDY",
"DRAMA", "COMEDY", "DRAMA", "COMEDY", "DRAMA", "COMEDY", "DRAMA",
"COMEDY", "DRAMA", "COMEDY", "DRAMA", "COMEDY", "DRAMA", "COMEDY"
), mean.us.viewers.m = c(6.85, 0.96, 4.32, 9.91, 1.87, 11.06,
1.92, 12.09, 2.27, 11.15, 1.82, 9.37, 1.52, 6.94, 1.21, 6.09,
8.18, 5.58, 12.14, 8.17, 18.91, 19.1, 10.95, 6.24), year2 = structure(c(16614,
16614, 16249, 16249, 15884, 15884, 15519, 15519, 15153, 15153,
14788, 14788, 14423, 14423, 14058, 14058, 13692, 13692, 13327,
13327, 12962, 12962, 12597, 12597), class = "Date")), .Names = c("year",
"name", "genre", "mean.us.viewers.m", "year2"), row.names = c(NA,
-24L), class = "data.frame")
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
year name genre mean.us.viewers.m
2015 GAME OF THRONES DRAMA 6.85
2015 VEEP COMEDY 0.96
2014 BREAKING BAD DRAMA 4.32
2014 MODERN FAMILY COMEDY 9.91
2013 BREAKING BAD DRAMA 1.87
2013 MODERN FAMILY COMEDY 11.06
2012 HOMELAND DRAMA 1.92
2012 MODERN FAMILY COMEDY 12.09
2011 MAD MEN DRAMA 2.27
2011 MODERN FAMILY COMEDY 11.15
2010 MAD MEN DRAMA 1.82
2010 MODERN FAMILY COMEDY 9.37
2009 MAD MEN DRAMA 1.52
2009 30 ROCK COMEDY 6.94
2008 MAD MEN DRAMA 1.21
2008 30 ROCK COMEDY 6.09
2007 THE SOPRANOS DRAMA 8.18
2007 30 ROCK COMEDY 5.58
2006 24 DRAMA 12.14
2006 THE OFFICE COMEDY 8.17
2005 LOST DRAMA 18.91
2005 EVERYBODY LOVES RAYMOND COMEDY 19.1
2004 THE SOPRANOS DRAMA 10.95
2004 ARRESTED DEVELOPMENT COMEDY 6.24
@hrbrmstr
Copy link

#### Load Data ####
df.winners = read.csv(
  "winnersWithMean.csv",
  header = TRUE,
  stringsAsFactors = FALSE,
  na.strings = c("", "NA"),
  sep="\t"
)

# dput for df.winners
# at the end of the code

#### Manipulate Data ####

# making genre a factor
df.winners$genre = as.factor(df.winners$genre)

# for plotting setting Year as Date Type (else treated as integer)
df.winners$year2 = as.Date(as.character(df.winners$year),format="%Y")

## color codes ####
col.grey = "#707070"
col.teal = "#368C8C"
col.blue = "#4682B4"
col.mid.green = "#98EFC1"
col.lig.green = "#B8FFD1"
col.dark.red = "darkred"


#### Visulaize Data ####

## g.1 ##

g.1 = ggplot(data=df.winners) + theme_minimal()

g.1 = g.1 + geom_line(data = filter(df.winners, grepl('COMEDY', genre)),
                      aes(x = year2, y = mean.us.viewers.m),alpha=0.8,size=0.5,col=col.lig.green)

g.1 = g.1 + geom_point(data = filter(df.winners, grepl('COMEDY', genre)),
                       aes(x = year2, y = mean.us.viewers.m),alpha=0.6,size=2,col=col.blue)

g.1 = g.1 + geom_line(data = filter(df.winners, grepl('DRAMA', genre)),
                      aes(x = year2, y = mean.us.viewers.m),alpha=0.8,size=0.5,col=col.lig.green)

g.1 = g.1 + geom_point(data = filter(df.winners, grepl('DRAMA', genre)),
                       aes(x = year2, y = mean.us.viewers.m),alpha=0.4,size=2,col=col.dark.red)

g.2 = g.1 + theme (
  panel.background = element_blank(),
  panel.grid.major.x = element_blank(),
  panel.grid.minor.x = element_blank(),
  panel.grid.minor.y = element_blank(),
  #plot.caption=element_text(size=8, margin=margin(t=24),
  axis.ticks.x = element_blank(),
  axis.ticks.y = element_blank(),
  #axis.title.x = element_blank(margin=margin(t=-24)),
  #axis.title.y = element_blank()
  axis.title.y = element_text(size = 8),
  #axis.title.x = element_text(margin=margin(t=-20)),
  axis.title.x = element_text(margin=margin(0,0,-10,0), vjust=-5),

  # src:
  # https://rud.is/b/2016/06/16/your-data-vis-spidey-sense-the-need-for-a-robust-utility-belt/
  plot.subtitle=element_text(size=9.5, margin=margin(b=10)),

  # SOLUTION for caption overlap (changed -10 to 10)

  plot.caption=element_text(size=7, margin=margin(t=10)),
  # margin around entire plot (‘unit’ with the sizes of the top, right, bottom, and left
  # margins)
  plot.margin=margin(10,10,10,10)

)

g.2 = g.2 +
  labs(
    x = NULL,
    y = NULL,
    title = "How viewership varies for Emmy Winners",
    subtitle = "Mean of \"US Viewers Per Episode\", in Millions across the years (Wikipedia)",
    caption = "Source: Wikipedia"
  ) 

# SOLUTION for X is to manually set the scale limits when using expand

g.2 = g.2 +
  scale_x_date(
    breaks = seq(as.Date("2004/1/1"), as.Date("2016/1/1"), by = "years"),
    labels = c("2004","","06","","08","","10","","12","","14","","16"),
    expand = c(0,0), limits=as.Date(c("2004-01-01", "2016-01-21"))
  ) 

# SOLUTION for Y is to manually set larger limits as well

g.2 = g.2 + scale_y_continuous(limits = c(0,21.5),
                               expand = c(0,0))

# label for genre DRAMA
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
                      aes(x=as.Date("2004/2/1"), y=10, label="DRAMA Genre"),col=col.dark.red,alpha=0.4,fontface = "bold")

# label for genre COMEDY
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
                      aes(x=as.Date("2004/2/1"), y=5, label="COMEDY Genre"),col=col.blue,alpha=0.6,fontface = "bold")


# label for cross over point 1
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
                      aes(x=as.Date("2005/12/1"), y=19, label="Cross-over \n Point 1"))

# highlight circle 1 for cross over point 1
g.2 = g.2 + geom_point(data=data.frame(),aes(x=as.Date("2005/7/1"), y=19),alpha=0.1,size=8,col=col.grey)

# label for cross over point 2
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
                      aes(x=as.Date("2007/9/1"), y=4.3, label="Cross-over \n Point 2"))

# highlight circle 1 for cross over point 2
g.2 = g.2 + geom_point(data=data.frame(),aes(x=as.Date("2007/12/1"), y=5.7),alpha=0.1,size=8,col=col.grey)

# label for cross over point 3
g.2 = g.2 + geom_text(data=data.frame(), hjust=0, size=2,
                      aes(x=as.Date("2014/1/1"), y=5.3, label="Cross-over \n Point 3"))

# highlight circle 1 for cross over point 3
g.2 = g.2 + geom_point(data=data.frame(),aes(x=as.Date("2015/1/1"), y=5.7),alpha=0.1,size=8,col=col.grey)

# Big bold line at y=0
# src
# http://t-redactyl.io/blog/2016/01/creating-plots-in-r-using-ggplot2-part-3-bar-plots.html
g.2 = g.2 + geom_hline(yintercept=0,size=1.2,colour="#535353") 

# I'd proably also add:

g.2 = g.2 + geom_label(data=data.frame(), hjust=0, size=2, 
                       label.size=0, label.padding=unit(0, "null"),
                      aes(x=as.Date("2004/01/01"), y=21, label="Viewers per ep (millions)"))

# (i know said "label" is in the subtitle but in eye tracking studies, most 
# ppl look at the chart first)

g.2 = g.2 + theme(axis.text.x=element_text(hjust=c(0, rep(0.5, 11), 1),
                                           margin=margin(t=-10)))
g.2 = g.2 + theme(axis.text.y=element_text(vjust=c(0, rep(0.5, 4)),
                                           margin=margin(r=-10)))

g.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment