Skip to content

Instantly share code, notes, and snippets.

@mrecos
Last active March 31, 2016 04:52
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrecos/66edff53f61a48f3d72d to your computer and use it in GitHub Desktop.
Save mrecos/66edff53f61a48f3d72d to your computer and use it in GitHub Desktop.
Code for post about creating a dataviz using new ggplot2 features and cowplot. post" http://matthewdharris.com/2016/03/19/visualizing-zubrow-ggplot2-cowplot/ tweet: https://twitter.com/Md_Harris/status/710639632711618560
library("ggplot2") # Must use Dev version as of 03/18/16
library("gridExtra")
library("extrafont") # for font selection
library("dplyr") # for data preperation
library("cowplot") # for combining plots
# Prepare data for plotting
# data from Zubrow, E.B.W. (1974), Population, Contact,and Climate in the New Mexican Pueblos
# prepared as a long format to facilitate plotting
year <- c(1760, 1790, 1797, 1850, 1860, 1889, 1900, 1910, 1950)
sites <- c("Isleta", "Acoma", "Laguna", "Zuni", "Sandia", "San Felipe",
"Santa Ana", "Zia", "Santo Domingo", "Jemez", "Cochiti",
"Tesuque", "Nambe", "San Ildefonso", "Pojoaque", "Santa Clara",
"San Juan", "Picuris", "Toas")
# [sites] are arranged in South to North geographic order
south <- c(seq_along(sites))
# add index to rearrange geographically from East to West
east <- c(14, 18, 17, 19, 12, 11, 13, 15, 10, 16, 9, 4, 3, 7, 5, 8, 6, 2, 1)
# population figures
pop <- c(
c(304, 410, 603, 751, 440, 1037, 1035, 956, 1051),
c(1052, 820, 757, 367, 523, 582, 492, 691, 1376),
c(600, 668, 802, 749, 929, 970, 1077, 1472, 1655),
c(664, 1935, 2716, 1294, 1300, 1547, 1525, 1667, 2564),
c(291, 304, 116, 241, 217, 150, 81, 73, 150),
c(458, 532, 282, 800, 360, 501, 515, 502, 721),
c(404, 356, 634, 339, 316, 264, 228, 219, 285),
c(568, 275, 262, 124, 115, 113, 115, 109, 145),
c(424, 650, 483, 666, 262, 930, 771, 817, 978),
c(373, 485, 272, 365, 650, 474, 452, 449, 789),
c(450, 720, 505, 254, 172, 300, 247, 237, 289),
c(232, 138, 155, 119, 97, 94, 80, 80, 145),
c(204, 155, 178, 107, 107, 80, 81, 88, 96),
c(484, 240, 251, 319, 166, 189, 137, 114, 152),
c(99, 53, 79, 48, 37, 18, 12, 16, 2),
c(257, 134, 193, 279, 179, 187, 222, 243, 511),
c(316, 260, 202, 568, 343, 373, 422, 388, 152),
c(328, 254, 251, 222, 143, 120, 95, 104, 99),
c(505, 518, 531, 361, 363, 324, 462, 517, 842)
)
# combine above in a data.frame where each population is the key observation
dat <- data.frame(Year = rep(year, length(sites)),
Site = rep(sites, each = length(year)),
Population = pop,
South = rep(south, each = length(year)),
East = rep(east, each = length(year)))
# add a factor that arranges the levels of site by their East to West geographic location
dat$East_label <- rep(factor(sites, levels = (sites[order(east)])), each = length(year))
# Calculate quantities of interest
# this ibe-liner calculates the date to date change in population
# ave() groups and runs the diff() function that is concatenated with a 0 for the first observation in each group
# probably can make this dplyr, but it works...
dat$Change <- ave(dat$Population, dat$Site, FUN = function(x) c(0, diff(x)))
# using dplyr to group the observations by site/Pueblo and calcualte stuff
dat <- group_by(dat, Site) %>%
# change from inception date of 1760 (first data point)
mutate(Relative_Change = Population - Population[1]) %>%
# percent change of log population (not used in this graphic)
mutate(Percent_Change = (Population - lag(Population))/lag(Population)) %>%
# change from inception scaled to % change from inception
mutate(Scaled_Change = (Relative_Change / Population[1])) %>%
# make into a data.frame
data.frame()
# select only the rows that are the last time period observation of 1950
# this is for the second plot
net_change <- dat[which(dat$Year == 1950),]
# plotting with ggplot2 (dev version from github)
# I started doing the [p1 <- p1 +] way to build these b/c others did and
# I liked how I could add selective lines while testing without changing anything
# so, it is more things to type, but whatever. It can be done in one ggplot() call if you like
# also, I code to get stuff done then optimize later. You may likey find ways to do this better
# Plot #1: Percent change from inception for each site
# set up data and axis
p1 <- ggplot(dat, aes(x = as.factor(Year), y = Scaled_Change, group = East))
# this is the bar plot part
p1 <- p1 + geom_bar(stat="identity", fill = "skyblue", color = "white")
# this is the redline outlining the bars
p1 <- p1 + geom_line(color = "firebrick1", alpha = 0.95)
# wrap it by the reordered factor so each individual site plots
p1 <- p1 + facet_wrap( ~ East_label, nrow = 2)
# add a reference line at zero
p1 <- p1 + geom_hline(yintercept = 0, linetype = 1, color = "gray35", size = 0.5)
# scale the y axis and calculate it as a percentage
p1 <- p1 + scale_y_continuous(breaks = c(seq(-1,3,1)), labels = scales::percent)
# thin out the dates in the x axis
p1 <- p1 + scale_x_discrete(breaks = c(1760, 1797, 1860, 1900, 1950))
# trusty old theme_bw() to prepare the look of the plot
p1 <- p1 + theme_bw()
# fine suggestion by @ClausWilke to make plot less busy
p1 <- p1 + background_grid(major = "xy", minor = "none")
# add the title, subtitle, and axis labels
# this is a new feature only available in the dev version of ggplot2
p1 <- p1 + labs(title="Percent Population Change from in New Mexico Pueblos from 1760 to 1950",
subtitle="Arranged from East to West",
x = "Year",
y = "Percent Population Change")
# all the things in the theme() section that stylize the plot and fonts
p1 <- p1 + theme(
strip.background = element_rect(colour = "white", fill = "white"),
strip.text.x = element_text(colour = "black", size = 7, face = "bold",
family = "Trebuchet MS"),
panel.margin = unit(0.3, "lines"),
panel.border = element_rect(colour = "gray90"),
axis.text.x = element_text(angle = 90, size = 6, family = "Trebuchet MS"),
axis.text.y = element_text(size = 6, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
# the three elements below are all new to the dev ersion
plot.caption = element_text(size = 8, hjust=0, margin=margin(t=5),
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic")
)
# Plot #2: Net change in population by site
# this uses a different data.frame from plot #1
# most of this is all the same as Plot #1, but not faceted.
# one trick here is to have two geom_bar calls, but the second is just data below the 0 line
p2 <- ggplot(data = net_change, aes(x = East_label, y = Scaled_Change, group = Year))
# first call to geom_bar for all data
p2 <- p2 + geom_bar(stat="identity", fill = "skyblue")
# second call to geom_bar for just negative data with a different color
p2 <- p2 + geom_bar(data = net_change[which(net_change$Scaled_Change < 0),],
stat="identity", fill = "skyblue4")
# a spline smooth to add the fit that shows the trend of population flow
p2 <- p2 + geom_smooth(data = transform(net_change),
method = "lm", formula = y ~ splines::bs(x, 3),
se = FALSE, color = "firebrick1", size = 0.5)
p2 <- p2 + geom_hline(yintercept = 0, linetype = 1, color = "gray35", size = 0.5)
p2 <- p2 + scale_y_continuous(breaks = c(seq(-1,3,0.5)), labels = scales::percent)
p2 <- p2 + theme_bw()
# fine suggestion by @ClausWilke to make plot less busy
p2 <- p2 + background_grid(major = "xy", minor = "none")
# same new features as described in plot #1
p2 <- p2 + labs(title="Net Percent Population Change from 1760 to 1950",
subtitle="Arranged from East to West",
x = NULL,
y = "Percent Population Change",
caption="Data: Zubrow, E.B.W. (1974), Population, Contact,and Climate in the New Mexican Pueblos")
p2 <- p2 + theme(
axis.text.x = element_text(angle = 90, size = 8, hjust = 1, family = "Trebuchet MS"),
axis.text.y = element_text(size = 7, family = "Trebuchet MS"),
axis.title = element_text(size = 8, family = "Trebuchet MS"),
# same new features as described in plot #1
plot.caption = element_text(size = 8, hjust=0, margin=margin(t=5),
family = "Trebuchet MS"),
plot.title=element_text(family="TrebuchetMS-Bold"),
plot.subtitle=element_text(family="TrebuchetMS-Italic")
)
# now the magic of cowplot to combine the plots
# plot_grid does the arranging of p1 and p2
# [labels] gives identification to each plot for your figure caption
# [nrow] arrnges them in stacked rows
# [rel_heights] is really important. This allows for the top plot to be made larger (1.75 times)
# than the lower plot. If your plots are not realtively equal in size, you may need this
# I adjusted by trial and error
cplot1 <- plot_grid(p1, p2, labels = c("A", "B"), nrow = 2, rel_heights = c(1.75,1))
# get an idea of what it looks like
plot(cplot1)
# save cowplot as an image!
# [base_width] and [base_height] allow of sizing the fina image Use in conjunction with [rel_heights]
save_plot("cowplot.png", cplot1, base_width = 8, base_height = 7.5)
# Done!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment