Created
June 19, 2015 17:31
-
-
Save bjurban/b4b7c497e43ad44f6066 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# weather-underground.R | |
# Bryan Urban | |
# 2015-06-19 | |
# this gets local weather data for a list of weather station codes from | |
# www.weatherunderground.com | |
# | |
# note: for large timespans or multiple cities, data scraping can be slow | |
## SETUP ----- | |
# install.packages("weatherData") | |
# install.packages("lubridate") | |
# install.packages("data.table") | |
library(weatherData) # for scraping the data from the web | |
library(lubridate) # for timestamp data cleanup | |
library(data.table) # for easier data analysis | |
# | |
# weather underground helper functions | |
# | |
get_weather <- function(id, date_start, date_end){ | |
require(weatherData) | |
dates <- | |
seq.Date(as.Date(date_start), | |
as.Date(date_end), | |
by="days") | |
w <- mapply(getDetailedWeather, date=dates, | |
MoreArgs=list(station_id = id, opt_all_columns=TRUE), | |
USE.NAMES=FALSE) | |
r <- data.frame() | |
for(i in 1:dim(w)[2]){ | |
r <- rbind(r, data.frame(w[,i])) | |
} | |
# names(r) <- tolower(names(r)) | |
# dput(names(r)) | |
names(r) <- c("time", "timedt", "temperaturef", "dew_pointf", "humidity", | |
"sea_level_pressurein", "visibilitymi", "wind_direction", | |
"wind_speedmph", "gust_speedmph", "precipitationin", "events", | |
"conditions", "winddirdegrees", "dateutc") | |
r$ids <- id | |
r | |
} | |
get_weather_multi <- function(ids, loc=NA, date_start, date_end){ | |
if(length(ids) != length(loc)) { loc= rep(NA, length(ids)) } | |
r <- data.frame() | |
for (i in seq_along(ids)){ | |
w <- get_weather(ids[i], date_start, date_end) | |
w$loc <- loc[i] | |
r <- rbind(r, w) | |
} | |
r | |
} | |
clean_weather <- function(wd) { | |
wdt <- data.table(wd) | |
wdt[, `:=`(time = ymd_hms(time), | |
temperaturef = as.numeric(temperaturef), | |
temperaturec = (temperaturef - 32)*5/9, | |
dew_pointf = as.numeric(dew_pointf), | |
dew_pointc = (dew_pointf - 32)*5/9, | |
precipitationin = as.numeric(precipitationin), | |
wind_direction = as.character(wind_direction), | |
wind_speedmph = as.numeric(wind_speedmph), | |
gust_speedmph = as.numeric(gust_speedmph), | |
conditions = as.numeric(conditions))] | |
my.colorder <- c("time", "dateutc", "timedt", | |
"temperaturef", "temperaturec", "dew_pointf", "dew_pointc", | |
"humidity", | |
"sea_level_pressurein", "visibilitymi", | |
"wind_direction", "wind_speedmph", "gust_speedmph", | |
"precipitationin", "events", "conditions", "winddirdegrees", | |
"ids", "loc") | |
setcolorder(wdt, my.colorder) | |
wdt | |
} | |
hourly_weather <- function(wdt) { | |
wdt <- | |
wdt[, list(temperaturef = mean(.SD$temperaturef, na.rm=TRUE), | |
temperaturec = mean(.SD$temperaturec, na.rm=TRUE), | |
dew_pointf = mean(.SD$dew_pointf, na.rm=TRUE), | |
dew_pointc = mean(.SD$dew_pointc, na.rm=TRUE), | |
humidity = mean(.SD$humidity, na.rm=TRUE), | |
sea_level_pressurein = mean(.SD$sea_level_pressurein, na.rm=TRUE), | |
visibilitymi = mean(.SD$visibilitymi, na.rm=TRUE), | |
wind_direction = as.character(median(.SD$wind_direction, na.rm=TRUE)), | |
wind_speedmph = mean(.SD$wind_speedmph, na.rm=TRUE), | |
gust_speedmph = max(.SD$gust_speedmph, na.rm=TRUE), | |
precipitationin = sum(.SD$precipitationin, na.rm=TRUE), | |
events = as.character(median(.SD$events), na.rm=TRUE), | |
conditions = median(.SD$conditions, na.rm=TRUE), | |
winddirdegrees = mean(.SD$winddirdegrees, na.rm=TRUE) | |
), .(floor_date(time, "hour"), ids, loc)] | |
setnames(wdt, "floor_date", "ts") | |
wdt | |
} | |
## EXAMPLES ----- | |
# | |
# get one week's weather data for one city: "KBOS" Boston | |
# | |
wbos <- get_weather("KBOS", "2015-05-01", "2015-05-07") | |
head(wbos) | |
# | |
# get one week's weather data for several cities | |
# | |
ids <- c("KBOS", "KELP", "KDCA", "KBWI", "KIAD") # airport codes prefixed with K | |
loc <- c("MA", "TX", "DC", "DC", "DC") # optional location labels | |
wd <- get_weather_multi(ids, loc, "2015-05-01", "2015-05-07") | |
# | |
# write minimally processed data to .csv file and then read back from .csv file | |
# | |
# fn <- "D:/data/weather-underground.csv" | |
# write.csv(wd, fn, row.names=FALSE) | |
# wd <- read.csv(fn, stringsAsFactors=FALSE) | |
# | |
# load raw data, clean up, and convert to data.table | |
# | |
wdt <- clean_weather(wd) # clean data, calculate temperatures in celcius, etc. | |
hw <- hourly_weather(wdt) # coerce to hourly data if desired | |
# | |
# plot results | |
# | |
library(lattice) # for plotting data | |
xyplot(temperaturec~time|loc, groups=ids, wdt, type='l') | |
xyplot(humidity~time|loc, groups=ids, wdt, type='l') | |
xyplot(dew_pointf~time|loc, groups=ids, wdt, type='l') | |
my.colors <- colorRampPalette(c("green", "yellow", "red")) | |
levelplot(temperaturef~ | |
floor_date(time, "day")* | |
hour(time) | paste(ids, loc, sep=", "), | |
wall, | |
col.regions=my.colors) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment