Skip to content

Instantly share code, notes, and snippets.

@seanjtaylor
Created January 24, 2015 17:16
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save seanjtaylor/8c67406e8881d373f180 to your computer and use it in GitHub Desktop.
Save seanjtaylor/8c67406e8881d373f180 to your computer and use it in GitHub Desktop.
library(mgcv)
library(ggplot2)
library(dplyr)
library(XML)
library(weatherData)
us.airports.url <- 'http://www.world-airport-codes.com/us-top-40-airports.html'
us.airports <- readHTMLTable(us.airports.url)[[1]] %>%
filter(!is.na(IATA)) %>%
select(City, IATA)
## Cartestian product
airport.years <- merge(us.airports, data.frame(year = 2010:2014)) %>%
mutate(City = as.character(City), IATA = as.character(IATA)) %>%
# For some reason San Diego doesn't work, so add the 'K'.
mutate(IATA = ifelse(IATA == 'SAN', 'KSAN', IATA))
temp.data <- airport.years %>%
group_by(City, IATA, year) %>% do({
df <- getWeatherForYear(.$IATA, .$year) %>%
mutate(Date = as.character(Date))
# Anyone know of a coalesce function??
if(is.null(df)) data.frame() else df
}) %>%
mutate(Date = as.Date(Date))
write.csv(temp.data, 'data/temp_data.csv', row.names = FALSE)
for.fit <- temp.data %>%
group_by(year) %>%
mutate(doy = as.numeric( Date - min(Date)),
# 0-1 scale for leap years.
doy.scaled = doy / as.numeric(max(Date) - min(Date)))
# Splines FTW!
m <- gam(Mean_TemperatureF ~ s(doy.scaled, bs = 'cc', by = factor(City)),
data = for.fit)
yhat <- predict(m, newdata = for.fit)
for.fit$yhat <- as.numeric(yhat)
# Typical year plot
cool.cities <- for.fit %>% filter(IATA %in% c('SFO', 'JFK', 'PHL', 'DCA', 'ORD', 'LAX', 'KSAN'))
ggplot(cool.cities, aes(x = Date, y = Mean_TemperatureF, colour = City, group = City)) +
geom_point(size = 1) +
geom_line(aes(y = yhat), size = 1) +
theme_bw() +
scale_colour_brewer(palette = 'Spectral') +
ylab('Mean Temp (F)')
# P(good day) plot.
temp.data %>%
group_by(City) %>%
mutate(good.day = (Min_TemperatureF > 50)*(Max_TemperatureF < 80)) %>%
summarise(perc.good = mean(good.day)) %>%
ungroup %>%
ggplot(aes(x = reorder(City, perc.good), y = perc.good)) +
geom_bar(stat = 'identity') +
theme_bw() +
coord_flip() +
xlab('City') + ylab('P(Good Day)')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment