Skip to content

Instantly share code, notes, and snippets.

@bjurban
Created June 19, 2015 17:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjurban/b4b7c497e43ad44f6066 to your computer and use it in GitHub Desktop.
Save bjurban/b4b7c497e43ad44f6066 to your computer and use it in GitHub Desktop.
# weather-underground.R
# Bryan Urban
# 2015-06-19
# this gets local weather data for a list of weather station codes from
# www.weatherunderground.com
#
# note: for large timespans or multiple cities, data scraping can be slow
## SETUP -----
# install.packages("weatherData")
# install.packages("lubridate")
# install.packages("data.table")
library(weatherData) # for scraping the data from the web
library(lubridate) # for timestamp data cleanup
library(data.table) # for easier data analysis
#
# weather underground helper functions
#
get_weather <- function(id, date_start, date_end){
require(weatherData)
dates <-
seq.Date(as.Date(date_start),
as.Date(date_end),
by="days")
w <- mapply(getDetailedWeather, date=dates,
MoreArgs=list(station_id = id, opt_all_columns=TRUE),
USE.NAMES=FALSE)
r <- data.frame()
for(i in 1:dim(w)[2]){
r <- rbind(r, data.frame(w[,i]))
}
# names(r) <- tolower(names(r))
# dput(names(r))
names(r) <- c("time", "timedt", "temperaturef", "dew_pointf", "humidity",
"sea_level_pressurein", "visibilitymi", "wind_direction",
"wind_speedmph", "gust_speedmph", "precipitationin", "events",
"conditions", "winddirdegrees", "dateutc")
r$ids <- id
r
}
get_weather_multi <- function(ids, loc=NA, date_start, date_end){
if(length(ids) != length(loc)) { loc= rep(NA, length(ids)) }
r <- data.frame()
for (i in seq_along(ids)){
w <- get_weather(ids[i], date_start, date_end)
w$loc <- loc[i]
r <- rbind(r, w)
}
r
}
clean_weather <- function(wd) {
wdt <- data.table(wd)
wdt[, `:=`(time = ymd_hms(time),
temperaturef = as.numeric(temperaturef),
temperaturec = (temperaturef - 32)*5/9,
dew_pointf = as.numeric(dew_pointf),
dew_pointc = (dew_pointf - 32)*5/9,
precipitationin = as.numeric(precipitationin),
wind_direction = as.character(wind_direction),
wind_speedmph = as.numeric(wind_speedmph),
gust_speedmph = as.numeric(gust_speedmph),
conditions = as.numeric(conditions))]
my.colorder <- c("time", "dateutc", "timedt",
"temperaturef", "temperaturec", "dew_pointf", "dew_pointc",
"humidity",
"sea_level_pressurein", "visibilitymi",
"wind_direction", "wind_speedmph", "gust_speedmph",
"precipitationin", "events", "conditions", "winddirdegrees",
"ids", "loc")
setcolorder(wdt, my.colorder)
wdt
}
hourly_weather <- function(wdt) {
wdt <-
wdt[, list(temperaturef = mean(.SD$temperaturef, na.rm=TRUE),
temperaturec = mean(.SD$temperaturec, na.rm=TRUE),
dew_pointf = mean(.SD$dew_pointf, na.rm=TRUE),
dew_pointc = mean(.SD$dew_pointc, na.rm=TRUE),
humidity = mean(.SD$humidity, na.rm=TRUE),
sea_level_pressurein = mean(.SD$sea_level_pressurein, na.rm=TRUE),
visibilitymi = mean(.SD$visibilitymi, na.rm=TRUE),
wind_direction = as.character(median(.SD$wind_direction, na.rm=TRUE)),
wind_speedmph = mean(.SD$wind_speedmph, na.rm=TRUE),
gust_speedmph = max(.SD$gust_speedmph, na.rm=TRUE),
precipitationin = sum(.SD$precipitationin, na.rm=TRUE),
events = as.character(median(.SD$events), na.rm=TRUE),
conditions = median(.SD$conditions, na.rm=TRUE),
winddirdegrees = mean(.SD$winddirdegrees, na.rm=TRUE)
), .(floor_date(time, "hour"), ids, loc)]
setnames(wdt, "floor_date", "ts")
wdt
}
## EXAMPLES -----
#
# get one week's weather data for one city: "KBOS" Boston
#
wbos <- get_weather("KBOS", "2015-05-01", "2015-05-07")
head(wbos)
#
# get one week's weather data for several cities
#
ids <- c("KBOS", "KELP", "KDCA", "KBWI", "KIAD") # airport codes prefixed with K
loc <- c("MA", "TX", "DC", "DC", "DC") # optional location labels
wd <- get_weather_multi(ids, loc, "2015-05-01", "2015-05-07")
#
# write minimally processed data to .csv file and then read back from .csv file
#
# fn <- "D:/data/weather-underground.csv"
# write.csv(wd, fn, row.names=FALSE)
# wd <- read.csv(fn, stringsAsFactors=FALSE)
#
# load raw data, clean up, and convert to data.table
#
wdt <- clean_weather(wd) # clean data, calculate temperatures in celcius, etc.
hw <- hourly_weather(wdt) # coerce to hourly data if desired
#
# plot results
#
library(lattice) # for plotting data
xyplot(temperaturec~time|loc, groups=ids, wdt, type='l')
xyplot(humidity~time|loc, groups=ids, wdt, type='l')
xyplot(dew_pointf~time|loc, groups=ids, wdt, type='l')
my.colors <- colorRampPalette(c("green", "yellow", "red"))
levelplot(temperaturef~
floor_date(time, "day")*
hour(time) | paste(ids, loc, sep=", "),
wall,
col.regions=my.colors)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment