-
-
Save abresler/424b89ca4ad421366623 to your computer and use it in GitHub Desktop.
panzoom with weather
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#http://rpubs.com/bradleyboehmke/weather_graphic | |
# Preprocessing & summarizing data | |
library(dplyr) | |
library(tidyr) | |
library(rvest) | |
# Visualizatin development | |
library(ggplot2) | |
devtools::install_github("timelyportfolio/svgPanZoom") | |
library(svgPanZoom) # see install step above | |
library(SVGAnnotation) | |
"http://academic.udayton.edu/kissock/http/Weather/gsod95-current/NYNEWYOR.txt" %>% | |
read.table() %>% data.frame %>% tbl_df -> DAY | |
names(DAY) <- c("Month", "Day", "Year", "Temp") | |
DAY %>% | |
group_by(Year, Month) %>% | |
arrange(Day) %>% | |
ungroup() %>% | |
group_by(Year) %>% | |
mutate(newDay = seq(1, length(Day))) %>% # label days as 1:365 (will represent x-axis) | |
ungroup() %>% | |
filter(Temp != -99 & Year != 2014) %>% # filter out missing data (identified with '-99' value) & current year data | |
group_by(newDay) %>% | |
mutate(upper = max(Temp), # identify max value for each day | |
lower = min(Temp), # identify min value for each day | |
avg = mean(Temp), # calculate mean value for each day | |
se = sd(Temp)/sqrt(length(Temp))) %>% # calculate standard error of mean | |
mutate(avg_upper = avg+(2.101*se), # calculate 95% CI for mean | |
avg_lower = avg-(2.101*se)) %>% # calculate 95% CI for mean | |
ungroup() -> Past | |
# create dataframe that represents current year data | |
DAY %>% | |
group_by(Year, Month) %>% | |
arrange(Day) %>% | |
ungroup() %>% | |
group_by(Year) %>% | |
mutate(newDay = seq(1, length(Day))) %>% # create matching x-axis as historical data | |
ungroup() %>% | |
filter(Temp != -99 & Year == 2014) -> Present # filter out missing data & select current year data | |
# create dataframe that represents the lowest temp for each day for the historical data | |
PastLows <- Past %>% | |
group_by(newDay) %>% | |
summarise(Pastlow = min(Temp)) # identify lowest temp for each day from 1975-2013 | |
# create dataframe that identifies the days in 2014 in which the temps were lower than all previous 19 years | |
PresentLows <- Present %>% | |
left_join(PastLows) %>% # merge historical lows to current year low data | |
mutate(record = ifelse(Temp<Pastlow, "Y", "N")) %>% # identifies if current year was record low | |
filter(record == "Y") # filter for days that represent current year record lows | |
# create dataframe that represents the highest temp for each day for the historical data | |
PastHighs <- Past %>% | |
group_by(newDay) %>% | |
summarise(Pasthigh = max(Temp)) # identify highest temp for each day from 1975-2013 | |
# create dataframe that identifies the days in 2014 in which the temps were higher than all previous 19 years | |
PresentHighs <- Present %>% | |
left_join(PastHighs) %>% # merge historical highs to current year low data | |
mutate(record = ifelse(Temp>Pasthigh, "Y", "N")) %>% # identifies if current year was record high | |
filter(record == "Y") # filter for days that represent current year record highs | |
dgr_fmt <- function(x, ...) { | |
parse(text = paste(x, "*degree", sep = "")) | |
} | |
# create y-axis variable | |
a <- dgr_fmt(seq(-10,100, by=10)) | |
# create a small dataframe to represent legend symbol for 2014 Temperature | |
data <- data.frame(x=seq(175,182),y=rnorm(8,15,2)) | |
p <- ggplot(Past, aes(newDay, Temp)) + | |
theme(plot.background = element_blank(), | |
panel.grid.minor = element_blank(), | |
panel.grid.major = element_blank(), | |
panel.border = element_blank(), | |
panel.background = element_blank(), | |
axis.ticks = element_blank(), | |
#axis.text = element_blank(), | |
axis.title = element_blank()) + | |
geom_linerange(Past, mapping=aes(x=newDay, ymin=lower, ymax=upper), colour = "wheat2", alpha=.1) | |
#p | |
#Next, we can add the data that represents the 95% confidence interval around the daily mean temperatures for 1975-2013. | |
p <- p + | |
geom_linerange(Past, mapping=aes(x=newDay, ymin=avg_lower, ymax=avg_upper), colour = "wheat4") | |
#p | |
#Here, we can incorporate the current year temperature data. This is also the step in which I incorporate the y-axis border. As you can see in Tufte’s original, the y-axis border appears as dashes; however, in reality it is a solid line that has the y-axis gridlines laying over top of it which creates the dashed effect at the tickmarks of interest | |
p <- p + | |
geom_line(Present, mapping=aes(x=newDay, y=Temp, group=1)) + | |
geom_vline(xintercept = 0, colour = "wheat3", linetype=1, size=1) | |
#p | |
#Now it’s time to add the x-axis gridlines. These gridlines are very discreet and are meant to only provide reference points when necessary. The only place the viewer needs to reference the degree relationship is within the “band” of data; otherwise, we want the gridlines to blend into the background to keep the ink ratio low. Another purpose of these gridlines is to create the dashed effect on the custom y-axis gridline. | |
p <- p + geom_hline(yintercept = -10, colour = "white", linetype=1) + | |
geom_hline(yintercept = 0, colour = "white", linetype=1) + | |
geom_hline(yintercept = 10, colour = "white", linetype=1) + | |
geom_hline(yintercept = 20, colour = "white", linetype=1) + | |
geom_hline(yintercept = 30, colour = "white", linetype=1) + | |
geom_hline(yintercept = 40, colour = "white", linetype=1) + | |
geom_hline(yintercept = 50, colour = "white", linetype=1) + | |
geom_hline(yintercept = 60, colour = "white", linetype=1) + | |
geom_hline(yintercept = 70, colour = "white", linetype=1) + | |
geom_hline(yintercept = 80, colour = "white", linetype=1) + | |
geom_hline(yintercept = 90, colour = "white", linetype=1) + | |
geom_hline(yintercept = 100, colour = "white", linetype=1) | |
#p | |
#Now we will start to add the x-axis gridlines. We add the dotted gridlines to the last day of each month. | |
p <- p + | |
geom_vline(xintercept = 31, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 59, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 90, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 120, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 151, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 181, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 212, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 243, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 273, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 304, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 334, colour = "wheat3", linetype=3, size=.5) + | |
geom_vline(xintercept = 365, colour = "wheat3", linetype=3, size=.5) | |
#Step 6 | |
#Now it’s time to dress up the axis labels. First, I limit the y-axis to a range of [-20°, 100°]. I then force the breaks to line up with the custom y-axis gridlines at even degrees in multiples of 10. I assign the labels to the degree formatted variable (“a”) that I created earlier to display the degree symbol. For the x-axis, I removed the spacing (it’s hard to see but there is padded space added to the original x-axis) at the edges of the x-axis, identified the breaks to place labels, and then provided the month names as the labels. | |
#Don’t be fooled, there was no magical approach to identifying the best breaks for the x-axis. I started with the day that represented the middle of each month and then moved them around as required to get the month names to appear centered. | |
p <- p + | |
coord_cartesian(ylim = c(-10,100)) + | |
scale_y_continuous(breaks = seq(-10,100, by=10), labels = a) + | |
scale_x_continuous(expand = c(0, 0), | |
breaks = c(15,45,75,105,135,165,195,228,258,288,320,350), | |
labels = c("January", "February", "March", "April", | |
"May", "June", "July", "August", "September", | |
"October", "November", "December")) | |
#Step 7 | |
#At this point we have the basic underlying graphic that is similar to Tufte’s temperature plot. Now it’s time to add in the extra comparisons that I wanted to look at; this includes adding in the points that identify the days in which the current year (2014) had the record high and low temperature. | |
p <- p + | |
geom_point(data=PresentLows, aes(x=newDay, y=Temp), colour="blue3") + | |
geom_point(data=PresentHighs, aes(x=newDay, y=Temp), colour="firebrick3") | |
p <- p + | |
ggtitle("New York City's Weather in 2014") + | |
theme(plot.title=element_text(face="bold",hjust=.012,vjust=.8,colour="#3C3C3C",size=20)) + | |
annotate("text", x = 35, y = 98, label = "Temperature in Fahrenheit", size=4, fontface="bold") | |
p + | |
annotate("text", x = 63, y = 93, | |
label = "Data represents average daily temperatures. Accessible data dates back to", size=3, colour="gray30") + | |
annotate("text", x = 59.5, y = 90, | |
label = "January 1, 1975. Data for 2014 is only available through December 16.", size=3, colour="gray30") + | |
annotate("text", x = 61, y = 87, | |
label = "Average temperature for the year was 54.8° making 2014 the 6th coldest", size=3, colour="gray30") + | |
annotate("text", x = 61, y = 84, label = "year since 1995", size=3, colour="gray30") -> p | |
p + | |
annotate("segment", x = 22, xend = 32, y = 12, yend = 3, colour = "blue3") + | |
annotate("text", x = 50, y = 2, label = "We had 25 days that were the", size=3, colour="blue3") + | |
annotate("text", x = 50, y = 0, label = "coldest since 1995", size=3, colour="blue3") + | |
annotate("segment", x = 169, xend = 169, y = 83, yend = 92, colour = "firebrick3") + | |
annotate("text", x = 169, y = 95, label = "We had 4 days that were the", size=3, colour="firebrick3") + | |
annotate("text", x = 169, y = 93, label = "hottest since 1995", size=3, colour="firebrick3") -> p | |
Present %>% filter(newDay %in% c(180:185)) %>% select(x = newDay, y = Temp) %>% data.frame -> legend_data | |
legend_data$y - 65 -> legend_data$y | |
p + | |
annotate("segment", x = 182, xend = 182, y = 5, yend = 25, colour = "wheat2", size=3) + | |
annotate("segment", x = 182, xend = 182, y = 12, yend = 18, colour = "wheat4", size=3) + | |
geom_line(data=legend_data, aes(x=x,y=y)) + | |
annotate("segment", x = 184, xend = 186, y = 17.7, yend = 17.7, colour = "wheat4", size=.5) + | |
annotate("segment", x = 184, xend = 186, y = 12.2, yend = 12.2, colour = "wheat4", size=.5) + | |
annotate("segment", x = 185, xend = 185, y = 12.2, yend = 17.7, colour = "wheat4", size=.5) + | |
annotate("text", x = 196, y = 14.75, label = "NORMAL RANGE", size=2, colour="gray30") + | |
annotate("text", x = 165, y = 14.75, label = "2014 TEMPERATURE", size=2, colour="gray30") + | |
annotate("text", x = 193, y = 25, label = "RECORD HIGH", size=2, colour="gray30") + | |
annotate("text", x = 193, y = 5, label = "RECORD LOW", size=2, colour="gray30") -> p | |
options(viewer = NULL) | |
svgPanZoom( | |
svgPlot( | |
show(p),addInfo=T,height=10,width=20 | |
) | |
,controlIconsEnabled=T | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
svg
output will be uglier, but try this instead