Weather plots
## For blog post here: https://mathewkiang.com/2017/12/02/tldr-san-diego-weather-is-better-than-boston-weather/ | |
## Imports ---- | |
library(tidyverse) | |
library(GSODR) | |
library(ggmap) | |
## Define places we are interested in and google maps query ---- | |
city_dict <- list(sandiego = "san diego airport", | |
boston = "boston logan") | |
## Query google maps for latitude longitude ---- | |
ll <- ggmap::geocode(unlist(city_dict, use.names = FALSE)) | |
## Turn geocode query into a dataframe ---- | |
cities <- data.frame(list(city = names(city_dict), | |
query = unlist(city_dict, use.names = FALSE), | |
lon = ll$lon, | |
lat = ll$lat), | |
stringsAsFactors = FALSE) | |
## Find the stations within 10 miles of our geocode ---- | |
city_station <- list() | |
for (r in 1:nrow(cities)) { | |
print(cities[r, 'search_q']) | |
temp_station <- nearest_stations(LAT = cities[r, 'lat'], | |
LON = cities[r, 'lon'], | |
distance = 10 * 1/.62) | |
city_station[[cities[r, 'city']]] <- temp_station | |
rm(temp_station) | |
} | |
## Join stations to our city/geocode dataframe ---- | |
cs_df <- NULL | |
for (i in seq_along(city_station)) { | |
temp_cs <- cbind(city = rep(names(city_station)[i], | |
length(city_station[[i]])), | |
stnid = city_station[[i]]) | |
cs_df <- rbind(cs_df, temp_cs) | |
rm(temp_cs) | |
} | |
cities <- left_join(cities, | |
as_data_frame(cs_df), | |
by = "city") | |
## Save cities dataframe ---- | |
dir.create(path = './data', showWarnings = FALSE, recursive = TRUE) | |
write_csv(cities, path = "./data/cities_df.csv") | |
## Download all cities for ~60 years and save the raw file ---- | |
weather_df <- get_GSOD(years = 1950:2017, station = cities$stnid) | |
write_csv(weather_df, path = "./data/raw_weather_data.csv.gz") |
## For blog post here: https://mathewkiang.com/2017/12/02/tldr-san-diego-weather-is-better-than-boston-weather/ | |
## Imports ---- | |
library(tidyverse) | |
library(GSODR) | |
library(ggmap) | |
library(RColorBrewer) | |
## Helpers ---- | |
## Convert C to F | |
c_to_f <- function(temp) { | |
return(temp * 9/5 + 32) | |
} | |
## For x-axis | |
days_in_mo <- function() { | |
return(c(31, 28, 31, 30, 31, 30, 31, | |
31, 30, 31, 30, 31)) | |
} | |
## For hlines | |
seq_gen <- function(df) { | |
lo <- floor(min(df$min_temp, na.rm = TRUE) / 10) * 10 | |
hi <- ceiling(max(df$max_temp, na.rm = TRUE) / 10) * 10 | |
return(seq(lo, hi, 10)) | |
} | |
## Plotting theme | |
mk_weather <- function(...) { | |
## Colors — stick with the ggplot2() greys | |
c_bg <- "white" | |
c_grid <- "grey50" | |
c_btext <- "grey5" | |
c_mtext <- "grey30" | |
# Begin construction of chart | |
theme_bw(base_size = 12, base_family = "Arial Narrow") + | |
# Region | |
theme(panel.background = element_rect(fill = c_bg, color = c_bg), | |
plot.background = element_rect(fill = c_bg, color = c_bg), | |
panel.border = element_rect(color = c_bg)) + | |
# Grid | |
theme(panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
axis.ticks = element_blank(), | |
axis.line.x = element_blank(), | |
axis.ticks.x = element_blank(), | |
axis.ticks.length = unit(0, "cm")) + | |
# Legend | |
theme(legend.position = "none", | |
legend.justification = c(0, 1), | |
legend.direction = "vertical", | |
legend.key = element_rect(fill = NA, color = NA), | |
legend.background = element_rect(fill = "transparent", color = NA), | |
legend.text = element_text(color = c_mtext)) + | |
# Titles, labels, etc. | |
theme(plot.title = element_text(color = c_btext, vjust = 1.25, | |
face = "bold", size = 18), | |
axis.text = element_text(size = 10, color = c_mtext), | |
axis.text.x = element_text(size = 10, color = c_mtext, | |
hjust = .5), | |
axis.title.x = element_text(size = 12, color = c_mtext, | |
hjust = 1), | |
axis.title.y = element_text(size = 12, color = c_mtext, | |
hjust = 1)) + | |
# Facets | |
theme(strip.background = element_rect(fill = "white", color = "black"), | |
strip.text = element_text(size = 10, color = c_btext)) + | |
# Plot margins | |
theme(plot.margin = unit(c(0.35, .2, 0.3, 0.35), "cm")) + | |
# Additionals | |
theme(...) | |
} | |
## Read in data: assumes you ran `01_get_data.R` ---- | |
cities <- read_csv('./data/cities_data.csv') | |
weather_df <- read_csv('./data/raw_weather_data.csv.gz') | |
## Remove columns we won't use, rename, clean up, etc. ---- | |
## Also filter out leap days because it's easier and I'm lazy. | |
sub_df <- weather_df %>% | |
tbl_df() %>% | |
setNames(tolower(names(.))) %>% | |
select(stnid, stn_name, country = ctry, state, lat, lon, | |
year, month, day, temp, visib, wdsp, max_temp = max, | |
min_temp = min, prcp, prcp_flag, rh) %>% | |
left_join(select(cities, stnid, city, city_lon = lon, city_lat = lat), | |
by = "stnid") %>% | |
mutate(month = as.integer(month), | |
day = as.integer(day), | |
year = as.integer(year), | |
prcp = as.numeric(prcp)) %>% | |
filter(!(day == 29 & month == 2)) %>% | |
group_by(stnid, year) %>% | |
arrange(year, month, day) %>% | |
mutate(doy = 1:n()) | |
## Now get the things we will need for the NYTimes graph: ---- | |
## (1) Record high/low by city | |
## (2) Daily average high/low by city | |
## (2) High and lows for 2017 | |
## (3) Days in 2017 that were record high/low | |
## (3) Weather for 2017 | |
weather_2017 <- sub_df %>% | |
filter(year == 2017) %>% | |
group_by(city, doy) %>% | |
select(year_max = max_temp, | |
year_min = min_temp, | |
doy, city) %>% | |
summarize(year_max = c_to_f(max(year_max, na.rm = TRUE)), | |
year_min = c_to_f(min(year_min, na.rm = TRUE))) | |
## (1), (2), and (4) | |
by_city_summary <- sub_df %>% | |
group_by(city, doy) %>% | |
summarize(mean_hi = c_to_f(mean(max_temp, na.rm = TRUE)), | |
mean_lo = c_to_f(mean(min_temp, na.rm = TRUE)), | |
max_temp = c_to_f(max(max_temp, na.rm = TRUE)), | |
min_temp = c_to_f(min(min_temp, na.rm = TRUE)), | |
stn_name = first(stn_name), | |
stnid = first(stnid), | |
county = first(country), | |
state = first(state), | |
lat = first(lat), | |
lon = first(lon), | |
city_lon = first(city_lon), | |
city_lat = first(city_lat)) | |
by_city_summary <- by_city_summary %>% | |
left_join(weather_2017, by = c("doy", "city")) %>% | |
mutate(record = ifelse(year_max == max_temp, year_max, | |
ifelse(year_min == min_temp, year_min, NA)), | |
record_col = ifelse(year_max == max_temp, "record high", | |
ifelse(year_min == min_temp, "record low", NA)), | |
record_y = ifelse(record_col == "record high", year_max + 3.5, | |
ifelse(record_col == "record low", year_min - 3.5, | |
NA)), | |
record_yend = ifelse(record_col == "record high", year_max + .5, | |
ifelse(record_col == "record low", year_min - .5, | |
NA))) | |
## San Diego plot ---- | |
sd_data <- filter(by_city_summary, city == "sandiego") | |
sdplot <- ggplot(sd_data, aes(x = doy)) + | |
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])), | |
color = "black", alpha = .5, linetype = "dotted") + | |
geom_linerange(aes(ymax = max_temp, ymin = min_temp), | |
color = "tan", alpha = .15, size = 1) + | |
geom_linerange(aes(ymax = mean_hi, ymin = mean_lo), | |
color = "plum4", alpha = .75, size = .75) + | |
geom_hline(yintercept = seq_gen(sd_data), | |
color = "white", alpha = 1) + | |
geom_linerange(aes(ymax = year_max, ymin = year_min), | |
color = "deeppink4", alpha = .75, size = .5) + | |
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70", | |
linetype = "solid") + | |
geom_point(aes(y = record), color = "white", size = 2, alpha = .75) + | |
geom_point(aes(y = record), color = "black", size = 1.5, alpha = .75) + | |
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) + | |
scale_y_continuous(expression("Temperature ("*degree*"F)"), | |
labels = function(x) { | |
parse(text = paste0(x, "*degree")) | |
}, | |
sec.axis = | |
sec_axis(trans = ~ (. -32) * 5/9, | |
labels = function(x) { | |
parse(text = paste0(x, "*degree")) | |
}, | |
name = expression("Temperature ("*degree*"C)")), | |
breaks = seq_gen(sd_data), | |
expand = c(0, 0)) + | |
scale_x_continuous(NULL, expand = c(0, 1.25), | |
breaks = days_in_mo()/2 + | |
cumsum(c(0, days_in_mo()[-12])), | |
labels = c("January", "February", "March", "April", | |
"May", "June", "July", "August", "September", | |
"October", "November", "December")) + | |
labs(title = "Temperature in San Diego, CA", | |
subtitle = paste0("Light brown bars represent the record high and low ", | |
"temperature for each day since 1950.", | |
'\nViolet bars represent the average daily high ', | |
'and low temperature.', | |
"\nPurple bars represent the daily high/low", | |
" observed in 2017.", | |
"\nBlack points represent record highs/lows that", | |
" were set or tied in 2017.")) | |
ggsave(sdplot, filename = "./san_diego.png", | |
height = 4, width = 6.5, scale = 1.35, dpi = 300) | |
## Boston ---- | |
bosdata <- filter(by_city_summary, city == "boston") | |
bosplot <- ggplot(bosdata, aes(x = doy)) + | |
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])), | |
color = "black", alpha = .5, linetype = "dotted") + | |
geom_linerange(aes(ymax = max_temp, ymin = min_temp), | |
color = "tan", alpha = .15, size = 1) + | |
geom_linerange(aes(ymax = mean_hi, ymin = mean_lo), | |
color = "plum4", alpha = .75, size = 1) + | |
geom_hline(yintercept = seq_gen(bosdata), | |
color = "white", alpha = 1) + | |
geom_linerange(aes(ymax = year_max, ymin = year_min), | |
color = "deeppink4", alpha = .75, size = .5) + | |
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70", | |
linetype = "solid") + | |
geom_point(aes(y = record), color = "white", size = 2, alpha = .75) + | |
geom_point(aes(y = record), color = "black", size = 1.5, alpha = .75) + | |
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) + | |
scale_y_continuous(expression("Temperature ("*degree*"F)"), | |
labels = function(x) { | |
parse(text = paste0(x, "*degree")) | |
}, | |
sec.axis = | |
sec_axis(trans = ~ (. -32) * 5/9, | |
labels = function(x) { | |
parse(text = paste0(x, "*degree")) | |
}, | |
name = expression("Temperature ("*degree*"C)")), | |
breaks = seq_gen(bosdata), | |
expand = c(0, 0)) + | |
scale_x_continuous(NULL, | |
expand = c(0, 1.25), | |
breaks = days_in_mo()/2 + | |
cumsum(c(0, days_in_mo()[-12])), | |
labels = c("January", "February", "March", "April", | |
"May", "June", "July", "August", "September", | |
"October", "November", "December")) + | |
labs(title = "Temperature in Boston, MA", | |
subtitle = paste0("Light brown bars represent the record high and low ", | |
"temperature for each day since 1950.", | |
'\nViolet bars represent the average daily high ', | |
'and low temperature.', | |
"\nPurple bars represent the daily high/low", | |
" observed in 2017.", | |
"\nBlack points represent record highs/lows that", | |
" were set or tied in 2017.")) | |
ggsave(bosplot, filename = "./boston.png", | |
height = 4, width = 6.5, scale = 1.35, dpi = 300) | |
## Mean hi/lo: boston vs san diego ----- | |
sdvsbos <- ggplot(by_city_summary, | |
aes(x = doy, ymax = mean_hi, ymin = mean_lo, | |
color = city, fill = city)) + | |
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])), | |
color = "black", alpha = .5, linetype = "dotted") + | |
geom_ribbon(aes(ymax = mean_hi, ymin = mean_lo), color = NA, alpha = .5) + | |
geom_hline(yintercept = seq(20, 90, 10), | |
color = "white", alpha = 1) + | |
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70", | |
linetype = "solid") + | |
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) + | |
scale_y_continuous(expression("Temperature ("*degree*"F)"), | |
labels = function(x) { | |
parse(text = paste0(x, "*degree")) | |
}, | |
sec.axis = | |
sec_axis(trans = ~ (. -32) * 5/9, | |
labels = function(x) { | |
parse(text = paste0(x, "*degree")) | |
}, | |
name = expression("Temperature ("*degree*"C)")), | |
breaks = seq(20, 90, 10), | |
expand = c(0, 0)) + | |
scale_x_continuous(NULL, | |
expand = c(0, 1.25), | |
breaks = days_in_mo()/2 + | |
cumsum(c(0, days_in_mo()[-12])), | |
labels = c("January", "February", "March", "April", | |
"May", "June", "July", "August", "September", | |
"October", "November", "December")) + | |
labs(title = "Mean daily high and low temperature", | |
subtitle = "Boston vs San Diego, 1950-2016") | |
ggsave(sdvsbos, filename = "./bostonvssandiego.png", | |
height = 4, width = 6.5, scale = 1.35, dpi = 300) | |
## Cumulative percipitation ---- | |
rain_df <- sub_df %>% | |
filter(grepl(pattern = "INTERNATIONAL AIRPORT", | |
stn_name)) %>% | |
group_by(stn_name, year) %>% | |
mutate(cumulative_prcp = cumsum(ifelse(is.na(prcp), 0, prcp)), | |
city_name = factor(city, | |
levels = c("boston", "sandiego"), | |
labels = c("Boston, MA", "San Diego, CA"), | |
ordered = TRUE)) | |
rain_plot <- ggplot(rain_df, | |
aes(x = doy, y = cumulative_prcp/100, group = year, color = -year)) + | |
geom_line(alpha = .35) + | |
facet_grid(~ city_name) + | |
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])), | |
color = "black", alpha = .5, linetype = "dotted") + | |
geom_hline(yintercept = seq(0, 22.5, 5), | |
color = "white", alpha = 0) + | |
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70", | |
linetype = "solid") + | |
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) + | |
scale_y_continuous("Precipitation (in)", | |
sec.axis = | |
sec_axis(trans = ~ . * 2.54, | |
name = "Precipitation (cm)"), | |
expand = c(0, 0)) + | |
scale_x_continuous(NULL, | |
expand = c(0, 1.25), | |
breaks = days_in_mo()/2 + | |
cumsum(c(0, days_in_mo()[-12])), | |
labels = c("January", "February", "March", "April", | |
"May", "June", "July", "August", "September", | |
"October", "November", "December")) + | |
labs(title = "Cumulative precipitation by year", | |
subtitle = "Boston vs San Diego, 1950-2016") | |
ggsave(rain_plot, filename = "./cumulative_rain.png", | |
height = 4, width = 10, scale = 1.6, dpi = 300) | |
## Temperature difference from the day before ---- | |
test <- sub_df %>% | |
filter(grepl(pattern = "INTERNATIONAL AIRPORT", | |
stn_name)) %>% | |
group_by(stn_name, year) %>% | |
arrange(year, doy) %>% | |
mutate(temp_lag_1 = temp - lag(temp), | |
city_name = factor(city, | |
levels = c("boston", "sandiego"), | |
labels = c("Boston, MA", "San Diego, CA"), | |
ordered = TRUE)) | |
temperature_diff <- ggplot(test, | |
aes(x = doy, y = temp_lag_1, | |
group = year, color = -year)) + | |
geom_line(alpha = .15) + | |
facet_grid(~ city_name) + | |
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])), | |
color = "black", alpha = .5, linetype = "dotted") + | |
geom_hline(yintercept = seq(-20, 20, 10), | |
color = "white", alpha = 1) + | |
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70", | |
linetype = "solid") + | |
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) + | |
scale_y_continuous(expression("Temperature ("*degree*"F)"), | |
labels = function(x) { | |
parse(text = paste0(x, "*degree")) | |
}, | |
breaks = seq(-20, 20, 5), | |
expand = c(0, 0)) + | |
scale_x_continuous(NULL, | |
expand = c(0, 1.25), | |
breaks = days_in_mo()/2 + | |
cumsum(c(0, days_in_mo()[-12])), | |
labels = c("January", "February", "March", "April", | |
"May", "June", "July", "August", "September", | |
"October", "November", "December")) + | |
labs(title = "Daily Temperature Change", | |
subtitle = "Boston vs San Diego, 1950-2016") | |
ggsave(temperature_diff, filename = "./temp_diff.png", | |
height = 4, width = 10, scale = 1.6, dpi = 200) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment