Skip to content

Instantly share code, notes, and snippets.

@mkiang
Last active August 29, 2015 14:16
Show Gist options
  • Save mkiang/6958fc2d65aac68523d0 to your computer and use it in GitHub Desktop.
Save mkiang/6958fc2d65aac68523d0 to your computer and use it in GitHub Desktop.
Boston Snowfall
## Side project name: Boston winters for the last ten years
## Author: M Kiang
## Data: http://www.ncdc.noaa.gov/snow-and-ice/daily-snow/
library(ggplot2)
library(dplyr)
library(tidyr)
# library(scales)
# library(gridExtra)
# library(directlabels)
## Helper cumulative sum function with missing data handling
cumsumNA <- function(x, insertNAs = TRUE) {
csx <- cumsum(ifelse(is.na(x), 0, x))
if (insertNAs == TRUE) {
csx <- csx + (x * 0)
}
return(csx)
}
## Helper function to scrape the data from NOAA. Returns a vector of snowfall.
getSnow <- function(year, month = "01", days = 31,
cpattern = "MA BOSTON", printurl = FALSE) {
# NOTE: Putting in MORE days is fine. Will result in NA.
# Widths for the columns (file is fixed-width)
day_splits <- c(6, 8, 7, 12, 33, 18, 14, rep(11, days))
# Gather up the URL stuff
base_url <- "http://www1.ncdc.noaa.gov/pub/data/snowmonitoring/fema/"
tail_url <- "-dlysnfl.txt"
file_url <- paste0(base_url, month, "-", year, tail_url)
# Debugging
if (printurl == TRUE) {
print(file_url)
}
# Get the file
con <- url(file_url)
x <- readLines(con)
close(con)
# Find city we need
city_ix <- grep(x, pattern = cpattern)
# Not all months have all cities so check grep
if (length(city_ix == 1)) {
small_x <- read.fwf(textConnection(x[city_ix]), width = day_splits)
y <- as.numeric(small_x[8 : ncol(small_x)])
} else {
# If city doesn't exist, just rep NA to keep year/month pattern.
y <- rep(NA, days)
}
return(y)
}
## NOAA data starts 7/2005.
## Winter is Dec, Jan, Feb according to NWS
## http://w1.weather.gov/glossary/index.php?letter=w
## Create explicit month and year vectors to avoid double for loops
mos <- c(12, rep(c(1, 2, 12), 9), 1:2)
yrs <- c(2005, rep(2006:2014, each = 3), rep(2015, 2))
## shift jan/feb back to group winters across years
wtr <- ifelse(mos == 12, yrs, yrs - 1)
## Pull the data
snowholder <- NULL
for (i in seq_along(mos)) {
tempsnow <- getSnow(yrs[i],
month = sprintf(mos[i], fmt = "%02d"),
printurl = FALSE)
snowholder <- c(snowholder, tempsnow)
}
## We extracted 31 days for every month
snowdf <- data.frame(year = rep(yrs, each = 31),
month = rep(mos, each = 31),
winter = rep(wtr, each = 31),
day = 1:31,
daily = snowholder)
## Remove excess days from months with fewer than 31 days. Add real date.
snowdf <- snowdf[!is.na(snowdf$daily), ]
snowdf$date <- with(data = snowdf, ISOdate(year, month, day))
## NOAA uses -9999 as missing. Convert to NA
snowdf$daily[snowdf$daily < 0] <- NA
## Make date labels starting with "00 01" for Dec 1st
snowdf$wday <- format(snowdf$date, "%m %d")
snowdf$wday <- gsub(snowdf$wday, fixed = TRUE,
pattern = "12 ", replacement = "00 ")
## Cumulative snowfall
cumsnow <- NULL
for (winter in unique(snowdf$winter)) {
temp <- cumsumNA(snowdf$daily[snowdf$winter == winter], insertNAs = FALSE)
cumsnow <- c(cumsnow, temp)
}
snowdf$cumulative <- cumsnow
## Line aesthetics
snowdf$lsize <- snowdf$lalpha <- snowdf$lcolor <- 0
snowdf$lsize[snowdf$winter == 2014] <- 1
snowdf$lalpha[snowdf$winter == 2014] <- 1
snowdf$lcolor[snowdf$winter == 2014] <- 1
## Reshape from wide to long
longdf <- snowdf %>% gather(type, inches, daily, cumulative)
## More descriptive facet names
levels(longdf$type) <- c("Daily snowfall", "Cumulative snowfall")
## Plot it
p1 <- ggplot(data = longdf, aes(x = wday, y = inches,
group = factor(winter))) +
geom_line(aes(size = lsize, alpha = lalpha, color = lcolor)) +
theme_classic() + facet_wrap(~ type, ncol = 1, scales = "free_y") +
scale_x_discrete(breaks = c("00 01", "00 15", "01 01", "01 15",
"02 01", "02 15", "02 29"),
labels = c("Dec 1", "Dec 15", "Jan 1", "Jan 15",
"Feb 1", "Feb 15", "Mar 1")) +
scale_y_continuous(expand = c(.03, 0)) +
theme(legend.position = "none",
strip.background = element_blank()) +
labs(y = "Inches", x = "",
title = "Boston winters: This season vs previous nine") +
scale_size(range = c(.5, 1.25)) + scale_alpha(range = c(.3, .9))
ggsave(plot = p1, file = "cumsnowfall.pdf", scale = 1.5,
width = 6, height = 3.5)
ggsave(plot = p1, file = "snowfallweb.png", scale = 1.5,
width = 6, height = 3.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment