Instantly share code, notes, and snippets.

Embed
What would you like to do?
Weather plots
## For blog post here: https://mathewkiang.com/2017/12/02/tldr-san-diego-weather-is-better-than-boston-weather/
## Imports ----
library(tidyverse)
library(GSODR)
library(ggmap)
## Define places we are interested in and google maps query ----
city_dict <- list(sandiego = "san diego airport",
boston = "boston logan")
## Query google maps for latitude longitude ----
ll <- ggmap::geocode(unlist(city_dict, use.names = FALSE))
## Turn geocode query into a dataframe ----
cities <- data.frame(list(city = names(city_dict),
query = unlist(city_dict, use.names = FALSE),
lon = ll$lon,
lat = ll$lat),
stringsAsFactors = FALSE)
## Find the stations within 10 miles of our geocode ----
city_station <- list()
for (r in 1:nrow(cities)) {
print(cities[r, 'search_q'])
temp_station <- nearest_stations(LAT = cities[r, 'lat'],
LON = cities[r, 'lon'],
distance = 10 * 1/.62)
city_station[[cities[r, 'city']]] <- temp_station
rm(temp_station)
}
## Join stations to our city/geocode dataframe ----
cs_df <- NULL
for (i in seq_along(city_station)) {
temp_cs <- cbind(city = rep(names(city_station)[i],
length(city_station[[i]])),
stnid = city_station[[i]])
cs_df <- rbind(cs_df, temp_cs)
rm(temp_cs)
}
cities <- left_join(cities,
as_data_frame(cs_df),
by = "city")
## Save cities dataframe ----
dir.create(path = './data', showWarnings = FALSE, recursive = TRUE)
write_csv(cities, path = "./data/cities_df.csv")
## Download all cities for ~60 years and save the raw file ----
weather_df <- get_GSOD(years = 1950:2017, station = cities$stnid)
write_csv(weather_df, path = "./data/raw_weather_data.csv.gz")
## For blog post here: https://mathewkiang.com/2017/12/02/tldr-san-diego-weather-is-better-than-boston-weather/
## Imports ----
library(tidyverse)
library(GSODR)
library(ggmap)
library(RColorBrewer)
## Helpers ----
## Convert C to F
c_to_f <- function(temp) {
return(temp * 9/5 + 32)
}
## For x-axis
days_in_mo <- function() {
return(c(31, 28, 31, 30, 31, 30, 31,
31, 30, 31, 30, 31))
}
## For hlines
seq_gen <- function(df) {
lo <- floor(min(df$min_temp, na.rm = TRUE) / 10) * 10
hi <- ceiling(max(df$max_temp, na.rm = TRUE) / 10) * 10
return(seq(lo, hi, 10))
}
## Plotting theme
mk_weather <- function(...) {
## Colors — stick with the ggplot2() greys
c_bg <- "white"
c_grid <- "grey50"
c_btext <- "grey5"
c_mtext <- "grey30"
# Begin construction of chart
theme_bw(base_size = 12, base_family = "Arial Narrow") +
# Region
theme(panel.background = element_rect(fill = c_bg, color = c_bg),
plot.background = element_rect(fill = c_bg, color = c_bg),
panel.border = element_rect(color = c_bg)) +
# Grid
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length = unit(0, "cm")) +
# Legend
theme(legend.position = "none",
legend.justification = c(0, 1),
legend.direction = "vertical",
legend.key = element_rect(fill = NA, color = NA),
legend.background = element_rect(fill = "transparent", color = NA),
legend.text = element_text(color = c_mtext)) +
# Titles, labels, etc.
theme(plot.title = element_text(color = c_btext, vjust = 1.25,
face = "bold", size = 18),
axis.text = element_text(size = 10, color = c_mtext),
axis.text.x = element_text(size = 10, color = c_mtext,
hjust = .5),
axis.title.x = element_text(size = 12, color = c_mtext,
hjust = 1),
axis.title.y = element_text(size = 12, color = c_mtext,
hjust = 1)) +
# Facets
theme(strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 10, color = c_btext)) +
# Plot margins
theme(plot.margin = unit(c(0.35, .2, 0.3, 0.35), "cm")) +
# Additionals
theme(...)
}
## Read in data: assumes you ran `01_get_data.R` ----
cities <- read_csv('./data/cities_data.csv')
weather_df <- read_csv('./data/raw_weather_data.csv.gz')
## Remove columns we won't use, rename, clean up, etc. ----
## Also filter out leap days because it's easier and I'm lazy.
sub_df <- weather_df %>%
tbl_df() %>%
setNames(tolower(names(.))) %>%
select(stnid, stn_name, country = ctry, state, lat, lon,
year, month, day, temp, visib, wdsp, max_temp = max,
min_temp = min, prcp, prcp_flag, rh) %>%
left_join(select(cities, stnid, city, city_lon = lon, city_lat = lat),
by = "stnid") %>%
mutate(month = as.integer(month),
day = as.integer(day),
year = as.integer(year),
prcp = as.numeric(prcp)) %>%
filter(!(day == 29 & month == 2)) %>%
group_by(stnid, year) %>%
arrange(year, month, day) %>%
mutate(doy = 1:n())
## Now get the things we will need for the NYTimes graph: ----
## (1) Record high/low by city
## (2) Daily average high/low by city
## (2) High and lows for 2017
## (3) Days in 2017 that were record high/low
## (3) Weather for 2017
weather_2017 <- sub_df %>%
filter(year == 2017) %>%
group_by(city, doy) %>%
select(year_max = max_temp,
year_min = min_temp,
doy, city) %>%
summarize(year_max = c_to_f(max(year_max, na.rm = TRUE)),
year_min = c_to_f(min(year_min, na.rm = TRUE)))
## (1), (2), and (4)
by_city_summary <- sub_df %>%
group_by(city, doy) %>%
summarize(mean_hi = c_to_f(mean(max_temp, na.rm = TRUE)),
mean_lo = c_to_f(mean(min_temp, na.rm = TRUE)),
max_temp = c_to_f(max(max_temp, na.rm = TRUE)),
min_temp = c_to_f(min(min_temp, na.rm = TRUE)),
stn_name = first(stn_name),
stnid = first(stnid),
county = first(country),
state = first(state),
lat = first(lat),
lon = first(lon),
city_lon = first(city_lon),
city_lat = first(city_lat))
by_city_summary <- by_city_summary %>%
left_join(weather_2017, by = c("doy", "city")) %>%
mutate(record = ifelse(year_max == max_temp, year_max,
ifelse(year_min == min_temp, year_min, NA)),
record_col = ifelse(year_max == max_temp, "record high",
ifelse(year_min == min_temp, "record low", NA)),
record_y = ifelse(record_col == "record high", year_max + 3.5,
ifelse(record_col == "record low", year_min - 3.5,
NA)),
record_yend = ifelse(record_col == "record high", year_max + .5,
ifelse(record_col == "record low", year_min - .5,
NA)))
## San Diego plot ----
sd_data <- filter(by_city_summary, city == "sandiego")
sdplot <- ggplot(sd_data, aes(x = doy)) +
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])),
color = "black", alpha = .5, linetype = "dotted") +
geom_linerange(aes(ymax = max_temp, ymin = min_temp),
color = "tan", alpha = .15, size = 1) +
geom_linerange(aes(ymax = mean_hi, ymin = mean_lo),
color = "plum4", alpha = .75, size = .75) +
geom_hline(yintercept = seq_gen(sd_data),
color = "white", alpha = 1) +
geom_linerange(aes(ymax = year_max, ymin = year_min),
color = "deeppink4", alpha = .75, size = .5) +
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70",
linetype = "solid") +
geom_point(aes(y = record), color = "white", size = 2, alpha = .75) +
geom_point(aes(y = record), color = "black", size = 1.5, alpha = .75) +
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) +
scale_y_continuous(expression("Temperature ("*degree*"F)"),
labels = function(x) {
parse(text = paste0(x, "*degree"))
},
sec.axis =
sec_axis(trans = ~ (. -32) * 5/9,
labels = function(x) {
parse(text = paste0(x, "*degree"))
},
name = expression("Temperature ("*degree*"C)")),
breaks = seq_gen(sd_data),
expand = c(0, 0)) +
scale_x_continuous(NULL, expand = c(0, 1.25),
breaks = days_in_mo()/2 +
cumsum(c(0, days_in_mo()[-12])),
labels = c("January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December")) +
labs(title = "Temperature in San Diego, CA",
subtitle = paste0("Light brown bars represent the record high and low ",
"temperature for each day since 1950.",
'\nViolet bars represent the average daily high ',
'and low temperature.',
"\nPurple bars represent the daily high/low",
" observed in 2017.",
"\nBlack points represent record highs/lows that",
" were set or tied in 2017."))
ggsave(sdplot, filename = "./san_diego.png",
height = 4, width = 6.5, scale = 1.35, dpi = 300)
## Boston ----
bosdata <- filter(by_city_summary, city == "boston")
bosplot <- ggplot(bosdata, aes(x = doy)) +
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])),
color = "black", alpha = .5, linetype = "dotted") +
geom_linerange(aes(ymax = max_temp, ymin = min_temp),
color = "tan", alpha = .15, size = 1) +
geom_linerange(aes(ymax = mean_hi, ymin = mean_lo),
color = "plum4", alpha = .75, size = 1) +
geom_hline(yintercept = seq_gen(bosdata),
color = "white", alpha = 1) +
geom_linerange(aes(ymax = year_max, ymin = year_min),
color = "deeppink4", alpha = .75, size = .5) +
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70",
linetype = "solid") +
geom_point(aes(y = record), color = "white", size = 2, alpha = .75) +
geom_point(aes(y = record), color = "black", size = 1.5, alpha = .75) +
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) +
scale_y_continuous(expression("Temperature ("*degree*"F)"),
labels = function(x) {
parse(text = paste0(x, "*degree"))
},
sec.axis =
sec_axis(trans = ~ (. -32) * 5/9,
labels = function(x) {
parse(text = paste0(x, "*degree"))
},
name = expression("Temperature ("*degree*"C)")),
breaks = seq_gen(bosdata),
expand = c(0, 0)) +
scale_x_continuous(NULL,
expand = c(0, 1.25),
breaks = days_in_mo()/2 +
cumsum(c(0, days_in_mo()[-12])),
labels = c("January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December")) +
labs(title = "Temperature in Boston, MA",
subtitle = paste0("Light brown bars represent the record high and low ",
"temperature for each day since 1950.",
'\nViolet bars represent the average daily high ',
'and low temperature.',
"\nPurple bars represent the daily high/low",
" observed in 2017.",
"\nBlack points represent record highs/lows that",
" were set or tied in 2017."))
ggsave(bosplot, filename = "./boston.png",
height = 4, width = 6.5, scale = 1.35, dpi = 300)
## Mean hi/lo: boston vs san diego -----
sdvsbos <- ggplot(by_city_summary,
aes(x = doy, ymax = mean_hi, ymin = mean_lo,
color = city, fill = city)) +
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])),
color = "black", alpha = .5, linetype = "dotted") +
geom_ribbon(aes(ymax = mean_hi, ymin = mean_lo), color = NA, alpha = .5) +
geom_hline(yintercept = seq(20, 90, 10),
color = "white", alpha = 1) +
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70",
linetype = "solid") +
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) +
scale_y_continuous(expression("Temperature ("*degree*"F)"),
labels = function(x) {
parse(text = paste0(x, "*degree"))
},
sec.axis =
sec_axis(trans = ~ (. -32) * 5/9,
labels = function(x) {
parse(text = paste0(x, "*degree"))
},
name = expression("Temperature ("*degree*"C)")),
breaks = seq(20, 90, 10),
expand = c(0, 0)) +
scale_x_continuous(NULL,
expand = c(0, 1.25),
breaks = days_in_mo()/2 +
cumsum(c(0, days_in_mo()[-12])),
labels = c("January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December")) +
labs(title = "Mean daily high and low temperature",
subtitle = "Boston vs San Diego, 1950-2016")
ggsave(sdvsbos, filename = "./bostonvssandiego.png",
height = 4, width = 6.5, scale = 1.35, dpi = 300)
## Cumulative percipitation ----
rain_df <- sub_df %>%
filter(grepl(pattern = "INTERNATIONAL AIRPORT",
stn_name)) %>%
group_by(stn_name, year) %>%
mutate(cumulative_prcp = cumsum(ifelse(is.na(prcp), 0, prcp)),
city_name = factor(city,
levels = c("boston", "sandiego"),
labels = c("Boston, MA", "San Diego, CA"),
ordered = TRUE))
rain_plot <- ggplot(rain_df,
aes(x = doy, y = cumulative_prcp/100, group = year, color = -year)) +
geom_line(alpha = .35) +
facet_grid(~ city_name) +
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])),
color = "black", alpha = .5, linetype = "dotted") +
geom_hline(yintercept = seq(0, 22.5, 5),
color = "white", alpha = 0) +
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70",
linetype = "solid") +
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) +
scale_y_continuous("Precipitation (in)",
sec.axis =
sec_axis(trans = ~ . * 2.54,
name = "Precipitation (cm)"),
expand = c(0, 0)) +
scale_x_continuous(NULL,
expand = c(0, 1.25),
breaks = days_in_mo()/2 +
cumsum(c(0, days_in_mo()[-12])),
labels = c("January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December")) +
labs(title = "Cumulative precipitation by year",
subtitle = "Boston vs San Diego, 1950-2016")
ggsave(rain_plot, filename = "./cumulative_rain.png",
height = 4, width = 10, scale = 1.6, dpi = 300)
## Temperature difference from the day before ----
test <- sub_df %>%
filter(grepl(pattern = "INTERNATIONAL AIRPORT",
stn_name)) %>%
group_by(stn_name, year) %>%
arrange(year, doy) %>%
mutate(temp_lag_1 = temp - lag(temp),
city_name = factor(city,
levels = c("boston", "sandiego"),
labels = c("Boston, MA", "San Diego, CA"),
ordered = TRUE))
temperature_diff <- ggplot(test,
aes(x = doy, y = temp_lag_1,
group = year, color = -year)) +
geom_line(alpha = .15) +
facet_grid(~ city_name) +
geom_vline(xintercept = cumsum(c(days_in_mo()[-12])),
color = "black", alpha = .5, linetype = "dotted") +
geom_hline(yintercept = seq(-20, 20, 10),
color = "white", alpha = 1) +
geom_vline(xintercept = c(0, 366), alpha = 1, color = "grey70",
linetype = "solid") +
mk_weather(axis.title.y.right = element_text(hjust = 1, angle = 90)) +
scale_y_continuous(expression("Temperature ("*degree*"F)"),
labels = function(x) {
parse(text = paste0(x, "*degree"))
},
breaks = seq(-20, 20, 5),
expand = c(0, 0)) +
scale_x_continuous(NULL,
expand = c(0, 1.25),
breaks = days_in_mo()/2 +
cumsum(c(0, days_in_mo()[-12])),
labels = c("January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December")) +
labs(title = "Daily Temperature Change",
subtitle = "Boston vs San Diego, 1950-2016")
ggsave(temperature_diff, filename = "./temp_diff.png",
height = 4, width = 10, scale = 1.6, dpi = 200)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment