Last active
August 29, 2015 14:16
-
-
Save mkiang/6958fc2d65aac68523d0 to your computer and use it in GitHub Desktop.
Boston Snowfall
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Side project name: Boston winters for the last ten years | |
## Author: M Kiang | |
## Data: http://www.ncdc.noaa.gov/snow-and-ice/daily-snow/ | |
library(ggplot2) | |
library(dplyr) | |
library(tidyr) | |
# library(scales) | |
# library(gridExtra) | |
# library(directlabels) | |
## Helper cumulative sum function with missing data handling | |
cumsumNA <- function(x, insertNAs = TRUE) { | |
csx <- cumsum(ifelse(is.na(x), 0, x)) | |
if (insertNAs == TRUE) { | |
csx <- csx + (x * 0) | |
} | |
return(csx) | |
} | |
## Helper function to scrape the data from NOAA. Returns a vector of snowfall. | |
getSnow <- function(year, month = "01", days = 31, | |
cpattern = "MA BOSTON", printurl = FALSE) { | |
# NOTE: Putting in MORE days is fine. Will result in NA. | |
# Widths for the columns (file is fixed-width) | |
day_splits <- c(6, 8, 7, 12, 33, 18, 14, rep(11, days)) | |
# Gather up the URL stuff | |
base_url <- "http://www1.ncdc.noaa.gov/pub/data/snowmonitoring/fema/" | |
tail_url <- "-dlysnfl.txt" | |
file_url <- paste0(base_url, month, "-", year, tail_url) | |
# Debugging | |
if (printurl == TRUE) { | |
print(file_url) | |
} | |
# Get the file | |
con <- url(file_url) | |
x <- readLines(con) | |
close(con) | |
# Find city we need | |
city_ix <- grep(x, pattern = cpattern) | |
# Not all months have all cities so check grep | |
if (length(city_ix == 1)) { | |
small_x <- read.fwf(textConnection(x[city_ix]), width = day_splits) | |
y <- as.numeric(small_x[8 : ncol(small_x)]) | |
} else { | |
# If city doesn't exist, just rep NA to keep year/month pattern. | |
y <- rep(NA, days) | |
} | |
return(y) | |
} | |
## NOAA data starts 7/2005. | |
## Winter is Dec, Jan, Feb according to NWS | |
## http://w1.weather.gov/glossary/index.php?letter=w | |
## Create explicit month and year vectors to avoid double for loops | |
mos <- c(12, rep(c(1, 2, 12), 9), 1:2) | |
yrs <- c(2005, rep(2006:2014, each = 3), rep(2015, 2)) | |
## shift jan/feb back to group winters across years | |
wtr <- ifelse(mos == 12, yrs, yrs - 1) | |
## Pull the data | |
snowholder <- NULL | |
for (i in seq_along(mos)) { | |
tempsnow <- getSnow(yrs[i], | |
month = sprintf(mos[i], fmt = "%02d"), | |
printurl = FALSE) | |
snowholder <- c(snowholder, tempsnow) | |
} | |
## We extracted 31 days for every month | |
snowdf <- data.frame(year = rep(yrs, each = 31), | |
month = rep(mos, each = 31), | |
winter = rep(wtr, each = 31), | |
day = 1:31, | |
daily = snowholder) | |
## Remove excess days from months with fewer than 31 days. Add real date. | |
snowdf <- snowdf[!is.na(snowdf$daily), ] | |
snowdf$date <- with(data = snowdf, ISOdate(year, month, day)) | |
## NOAA uses -9999 as missing. Convert to NA | |
snowdf$daily[snowdf$daily < 0] <- NA | |
## Make date labels starting with "00 01" for Dec 1st | |
snowdf$wday <- format(snowdf$date, "%m %d") | |
snowdf$wday <- gsub(snowdf$wday, fixed = TRUE, | |
pattern = "12 ", replacement = "00 ") | |
## Cumulative snowfall | |
cumsnow <- NULL | |
for (winter in unique(snowdf$winter)) { | |
temp <- cumsumNA(snowdf$daily[snowdf$winter == winter], insertNAs = FALSE) | |
cumsnow <- c(cumsnow, temp) | |
} | |
snowdf$cumulative <- cumsnow | |
## Line aesthetics | |
snowdf$lsize <- snowdf$lalpha <- snowdf$lcolor <- 0 | |
snowdf$lsize[snowdf$winter == 2014] <- 1 | |
snowdf$lalpha[snowdf$winter == 2014] <- 1 | |
snowdf$lcolor[snowdf$winter == 2014] <- 1 | |
## Reshape from wide to long | |
longdf <- snowdf %>% gather(type, inches, daily, cumulative) | |
## More descriptive facet names | |
levels(longdf$type) <- c("Daily snowfall", "Cumulative snowfall") | |
## Plot it | |
p1 <- ggplot(data = longdf, aes(x = wday, y = inches, | |
group = factor(winter))) + | |
geom_line(aes(size = lsize, alpha = lalpha, color = lcolor)) + | |
theme_classic() + facet_wrap(~ type, ncol = 1, scales = "free_y") + | |
scale_x_discrete(breaks = c("00 01", "00 15", "01 01", "01 15", | |
"02 01", "02 15", "02 29"), | |
labels = c("Dec 1", "Dec 15", "Jan 1", "Jan 15", | |
"Feb 1", "Feb 15", "Mar 1")) + | |
scale_y_continuous(expand = c(.03, 0)) + | |
theme(legend.position = "none", | |
strip.background = element_blank()) + | |
labs(y = "Inches", x = "", | |
title = "Boston winters: This season vs previous nine") + | |
scale_size(range = c(.5, 1.25)) + scale_alpha(range = c(.3, .9)) | |
ggsave(plot = p1, file = "cumsnowfall.pdf", scale = 1.5, | |
width = 6, height = 3.5) | |
ggsave(plot = p1, file = "snowfallweb.png", scale = 1.5, | |
width = 6, height = 3.5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment