Skip to content

Instantly share code, notes, and snippets.

@abresler
Last active August 29, 2015 14:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abresler/424b89ca4ad421366623 to your computer and use it in GitHub Desktop.
Save abresler/424b89ca4ad421366623 to your computer and use it in GitHub Desktop.
panzoom with weather
#http://rpubs.com/bradleyboehmke/weather_graphic
# Preprocessing & summarizing data
library(dplyr)
library(tidyr)
library(rvest)
# Visualizatin development
library(ggplot2)
devtools::install_github("timelyportfolio/svgPanZoom")
library(svgPanZoom) # see install step above
library(SVGAnnotation)
"http://academic.udayton.edu/kissock/http/Weather/gsod95-current/NYNEWYOR.txt" %>%
read.table() %>% data.frame %>% tbl_df -> DAY
names(DAY) <- c("Month", "Day", "Year", "Temp")
DAY %>%
group_by(Year, Month) %>%
arrange(Day) %>%
ungroup() %>%
group_by(Year) %>%
mutate(newDay = seq(1, length(Day))) %>% # label days as 1:365 (will represent x-axis)
ungroup() %>%
filter(Temp != -99 & Year != 2014) %>% # filter out missing data (identified with '-99' value) & current year data
group_by(newDay) %>%
mutate(upper = max(Temp), # identify max value for each day
lower = min(Temp), # identify min value for each day
avg = mean(Temp), # calculate mean value for each day
se = sd(Temp)/sqrt(length(Temp))) %>% # calculate standard error of mean
mutate(avg_upper = avg+(2.101*se), # calculate 95% CI for mean
avg_lower = avg-(2.101*se)) %>% # calculate 95% CI for mean
ungroup() -> Past
# create dataframe that represents current year data
DAY %>%
group_by(Year, Month) %>%
arrange(Day) %>%
ungroup() %>%
group_by(Year) %>%
mutate(newDay = seq(1, length(Day))) %>% # create matching x-axis as historical data
ungroup() %>%
filter(Temp != -99 & Year == 2014) -> Present # filter out missing data & select current year data
# create dataframe that represents the lowest temp for each day for the historical data
PastLows <- Past %>%
group_by(newDay) %>%
summarise(Pastlow = min(Temp)) # identify lowest temp for each day from 1975-2013
# create dataframe that identifies the days in 2014 in which the temps were lower than all previous 19 years
PresentLows <- Present %>%
left_join(PastLows) %>% # merge historical lows to current year low data
mutate(record = ifelse(Temp<Pastlow, "Y", "N")) %>% # identifies if current year was record low
filter(record == "Y") # filter for days that represent current year record lows
# create dataframe that represents the highest temp for each day for the historical data
PastHighs <- Past %>%
group_by(newDay) %>%
summarise(Pasthigh = max(Temp)) # identify highest temp for each day from 1975-2013
# create dataframe that identifies the days in 2014 in which the temps were higher than all previous 19 years
PresentHighs <- Present %>%
left_join(PastHighs) %>% # merge historical highs to current year low data
mutate(record = ifelse(Temp>Pasthigh, "Y", "N")) %>% # identifies if current year was record high
filter(record == "Y") # filter for days that represent current year record highs
dgr_fmt <- function(x, ...) {
parse(text = paste(x, "*degree", sep = ""))
}
# create y-axis variable
a <- dgr_fmt(seq(-10,100, by=10))
# create a small dataframe to represent legend symbol for 2014 Temperature
data <- data.frame(x=seq(175,182),y=rnorm(8,15,2))
p <- ggplot(Past, aes(newDay, Temp)) +
theme(plot.background = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
#axis.text = element_blank(),
axis.title = element_blank()) +
geom_linerange(Past, mapping=aes(x=newDay, ymin=lower, ymax=upper), colour = "wheat2", alpha=.1)
#p
#Next, we can add the data that represents the 95% confidence interval around the daily mean temperatures for 1975-2013.
p <- p +
geom_linerange(Past, mapping=aes(x=newDay, ymin=avg_lower, ymax=avg_upper), colour = "wheat4")
#p
#Here, we can incorporate the current year temperature data. This is also the step in which I incorporate the y-axis border. As you can see in Tufte’s original, the y-axis border appears as dashes; however, in reality it is a solid line that has the y-axis gridlines laying over top of it which creates the dashed effect at the tickmarks of interest
p <- p +
geom_line(Present, mapping=aes(x=newDay, y=Temp, group=1)) +
geom_vline(xintercept = 0, colour = "wheat3", linetype=1, size=1)
#p
#Now it’s time to add the x-axis gridlines. These gridlines are very discreet and are meant to only provide reference points when necessary. The only place the viewer needs to reference the degree relationship is within the “band” of data; otherwise, we want the gridlines to blend into the background to keep the ink ratio low. Another purpose of these gridlines is to create the dashed effect on the custom y-axis gridline.
p <- p + geom_hline(yintercept = -10, colour = "white", linetype=1) +
geom_hline(yintercept = 0, colour = "white", linetype=1) +
geom_hline(yintercept = 10, colour = "white", linetype=1) +
geom_hline(yintercept = 20, colour = "white", linetype=1) +
geom_hline(yintercept = 30, colour = "white", linetype=1) +
geom_hline(yintercept = 40, colour = "white", linetype=1) +
geom_hline(yintercept = 50, colour = "white", linetype=1) +
geom_hline(yintercept = 60, colour = "white", linetype=1) +
geom_hline(yintercept = 70, colour = "white", linetype=1) +
geom_hline(yintercept = 80, colour = "white", linetype=1) +
geom_hline(yintercept = 90, colour = "white", linetype=1) +
geom_hline(yintercept = 100, colour = "white", linetype=1)
#p
#Now we will start to add the x-axis gridlines. We add the dotted gridlines to the last day of each month.
p <- p +
geom_vline(xintercept = 31, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 59, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 90, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 120, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 151, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 181, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 212, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 243, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 273, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 304, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 334, colour = "wheat3", linetype=3, size=.5) +
geom_vline(xintercept = 365, colour = "wheat3", linetype=3, size=.5)
#Step 6
#Now it’s time to dress up the axis labels. First, I limit the y-axis to a range of [-20°, 100°]. I then force the breaks to line up with the custom y-axis gridlines at even degrees in multiples of 10. I assign the labels to the degree formatted variable (“a”) that I created earlier to display the degree symbol. For the x-axis, I removed the spacing (it’s hard to see but there is padded space added to the original x-axis) at the edges of the x-axis, identified the breaks to place labels, and then provided the month names as the labels.
#Don’t be fooled, there was no magical approach to identifying the best breaks for the x-axis. I started with the day that represented the middle of each month and then moved them around as required to get the month names to appear centered.
p <- p +
coord_cartesian(ylim = c(-10,100)) +
scale_y_continuous(breaks = seq(-10,100, by=10), labels = a) +
scale_x_continuous(expand = c(0, 0),
breaks = c(15,45,75,105,135,165,195,228,258,288,320,350),
labels = c("January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December"))
#Step 7
#At this point we have the basic underlying graphic that is similar to Tufte’s temperature plot. Now it’s time to add in the extra comparisons that I wanted to look at; this includes adding in the points that identify the days in which the current year (2014) had the record high and low temperature.
p <- p +
geom_point(data=PresentLows, aes(x=newDay, y=Temp), colour="blue3") +
geom_point(data=PresentHighs, aes(x=newDay, y=Temp), colour="firebrick3")
p <- p +
ggtitle("New York City's Weather in 2014") +
theme(plot.title=element_text(face="bold",hjust=.012,vjust=.8,colour="#3C3C3C",size=20)) +
annotate("text", x = 35, y = 98, label = "Temperature in Fahrenheit", size=4, fontface="bold")
p +
annotate("text", x = 63, y = 93,
label = "Data represents average daily temperatures. Accessible data dates back to", size=3, colour="gray30") +
annotate("text", x = 59.5, y = 90,
label = "January 1, 1975. Data for 2014 is only available through December 16.", size=3, colour="gray30") +
annotate("text", x = 61, y = 87,
label = "Average temperature for the year was 54.8° making 2014 the 6th coldest", size=3, colour="gray30") +
annotate("text", x = 61, y = 84, label = "year since 1995", size=3, colour="gray30") -> p
p +
annotate("segment", x = 22, xend = 32, y = 12, yend = 3, colour = "blue3") +
annotate("text", x = 50, y = 2, label = "We had 25 days that were the", size=3, colour="blue3") +
annotate("text", x = 50, y = 0, label = "coldest since 1995", size=3, colour="blue3") +
annotate("segment", x = 169, xend = 169, y = 83, yend = 92, colour = "firebrick3") +
annotate("text", x = 169, y = 95, label = "We had 4 days that were the", size=3, colour="firebrick3") +
annotate("text", x = 169, y = 93, label = "hottest since 1995", size=3, colour="firebrick3") -> p
Present %>% filter(newDay %in% c(180:185)) %>% select(x = newDay, y = Temp) %>% data.frame -> legend_data
legend_data$y - 65 -> legend_data$y
p +
annotate("segment", x = 182, xend = 182, y = 5, yend = 25, colour = "wheat2", size=3) +
annotate("segment", x = 182, xend = 182, y = 12, yend = 18, colour = "wheat4", size=3) +
geom_line(data=legend_data, aes(x=x,y=y)) +
annotate("segment", x = 184, xend = 186, y = 17.7, yend = 17.7, colour = "wheat4", size=.5) +
annotate("segment", x = 184, xend = 186, y = 12.2, yend = 12.2, colour = "wheat4", size=.5) +
annotate("segment", x = 185, xend = 185, y = 12.2, yend = 17.7, colour = "wheat4", size=.5) +
annotate("text", x = 196, y = 14.75, label = "NORMAL RANGE", size=2, colour="gray30") +
annotate("text", x = 165, y = 14.75, label = "2014 TEMPERATURE", size=2, colour="gray30") +
annotate("text", x = 193, y = 25, label = "RECORD HIGH", size=2, colour="gray30") +
annotate("text", x = 193, y = 5, label = "RECORD LOW", size=2, colour="gray30") -> p
options(viewer = NULL)
svgPanZoom(
svgPlot(
show(p),addInfo=T,height=10,width=20
)
,controlIconsEnabled=T
)
@timelyportfolio
Copy link

svg output will be uglier, but try this instead

svgPanZoom(
  svgPlot(
    show(p),addInfo=F,height=12,width=20
  )
 ,controlIconsEnabled=T
)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment