Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Year-by-year 1-column comparison of timeseries data with facet_wrap
#!/usr/bin/env Rscript
library(ggplot2)
library(stringr)
library(gridExtra)
library(scales)
args <- commandArgs(trailingOnly = TRUE)
format_si <- function(...) {
# Format a vector of numeric values according
# to the International System of Units.
# http://en.wikipedia.org/wiki/SI_prefix
#
# Based on code by Ben Tupper
# https://stat.ethz.ch/pipermail/r-help/2012-January/299804.html
# Args:
# ...: Args passed to format()
#
# Returns:
# A function to format a vector of strings using
# SI prefix notation
#
# Usage:
# scale_y_continuous(labels=format_si()) +
#
function(x) {
limits <- c(1e-24, 1e-21, 1e-18, 1e-15, 1e-12,
1e-9, 1e-6, 1e-3, 1e0, 1e3,
1e6, 1e9, 1e12, 1e15, 1e18,
1e21, 1e24)
prefix <- c("y", "z", "a", "f", "p",
"n", "µ", "m", " ", "k",
"M", "G", "T", "P", "E",
"Z", "Y")
# Vector with array indices according to position in intervals
i <- findInterval(abs(x), limits)
# Set prefix to " " for very small values < 1e-24
i <- ifelse(i==0, which(limits == 1e0), i)
paste(format(round(x/limits[i], 1),
trim=TRUE, scientific=FALSE, ...),
prefix[i])
}
}
Y = read.delim(args[1], sep=",", header=TRUE)
Y$date <- as.POSIXct(Y$time)
png(filename = paste(sep="", args[1], ".png"), width = 550, height = 300, units = 'px')
print(
ggplot(data=Y) +
geom_line(aes(date, count), color="#e56d25") +
scale_y_continuous(labels=format_si()) +
scale_x_datetime(limits=c(as.POSIXct("2007-01-01"), as.POSIXct("2012-09-01"))) +
xlab("Date") +
ylab("Tweets per Day") +
ggtitle(args[2]) +
opts(legend.position = 'none',
panel.background = theme_rect(fill = "#545454"),
panel.grid.major = theme_line(colour = "#757575"),
panel.grid.minor = theme_line(colour = "#757575")
)
)
dev.off()
##
# year over year comparison with facet wrap
#
# simulate dates in single year (2000 in this case),
# but give them yr factors for facet
# use custom formatting
#
Y$Yr <- as.factor(as.POSIXlt(Y$time)$year + 1900)
Y$Mn <- as.factor(1 + as.POSIXlt(Y$time)$mon)
Y$Dy <- as.factor(as.POSIXlt(Y$time)$mday)
# use dates for easier plotting
Y <- transform(Y, doy = as.Date(paste("2000", Y$Mn, Y$Dy, sep="/")))
png(filename = paste(sep="", args[1], ".year.png"), width = 550, height = 800, units = 'px')
print(
ggplot(data=Y) +
geom_line(aes(doy, count), color="#e56d25") +
facet_wrap( ~ Yr, ncol = 1, scales="free_y" ) +
scale_y_continuous(labels=format_si()) +
scale_x_date(labels=date_format("%b"), breaks = seq(min(Y$doy),max(Y$doy),"month")) +
xlab("Date") + ylab("Tweets per Day") +
labs( title = args[2] ) +
theme( legend.position = 'none',
panel.margin = unit(1.5, 'line'),
strip.text.x = element_text(size=12, face="bold"),
panel.background = element_rect(fill = "#545454"),
panel.grid.major = element_line(colour = "#757575"),
panel.grid.minor = element_line(colour = "#757575")
)
)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment