-
-
Save morrisseyj/3f88db1afb4bc2562d3648a523558d0f to your computer and use it in GitHub Desktop.
Heat record visualization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#The below code is used to support the generation of the analysis informing this blog: [insert html] | |
#Note this only pertains to the animated visualization, not the bar chart on that blog | |
library('ncdf4') # package for netcdf manipulation | |
library('ggplot2') # package for plotting | |
library('tidyverse') #for wrangling | |
library('lubridate') #for dates | |
library('gganimate') #for animation | |
library('magick') #for animation | |
library('zoo') #for NA | |
library('purrr') #to map functions | |
#Download the files for high and low temps using equal area, from here: http://berkeleyearth.org/data/ | |
download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMAX_EqualArea.nc', 'berk_tmax_equal_area.nc', mode = 'wb') | |
download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMIN_EqualArea.nc', 'berk_tmin_equal_area.nc', mode = 'wb') | |
#import the Berkely earth data - from here: http://berkeleyearth.org/data/ | |
tmax_nc_berk <- nc_open('berk_tmax_equal_area.nc') | |
tmin_nc_berk <- nc_open('berk_tmin_equal_area.nc') | |
#looking at the nc_berk data, we have: | |
#A two dimensional array of temperature data, with time and temperature data, for each lon and lat | |
#Plan is to extract the array for every time period, match it to the lat and lon data and stick it in a single df | |
#First get the array | |
temp_anom_array <- ncvar_get(tmax_nc_berk, 'temperature') | |
#Get the time dimensions of the array - which is dimension 2 in this case | |
time_layers <- 1:dim(temp_anom_array)[2] | |
#Then write a function that creates a df for a stated time value | |
#Function takes a df and an index number for the array and produces a df with the time value and temperature data for that index, for every lat, long combination | |
layer_extract <- function(df, x) { | |
output <- data.frame(date = ncvar_get(df, 'time')[x], #grab the indexed time var | |
lon = ncvar_get(df, 'longitude'), #grab the lat and lon | |
lat = ncvar_get(df, 'latitude'), | |
temp_anom = ncvar_get(df, 'temperature')[,x]) #grab the indexed temperature var | |
#Clean the output | |
output <- output %>% | |
separate(date, into = c('year', 'day'), sep = '\\.') %>% | |
mutate(day = as.numeric(paste0('0.', day))) %>% | |
mutate(days = ifelse(leap_year(as.numeric(year)), 366, 365)) %>% | |
mutate(day = round(day * days, 0)) %>% | |
mutate(date = as.Date(day, format = '%j', origin = as.Date(paste0(year, '-01-01')))) %>% | |
select(date, lon, lat, temp_anom) | |
print(paste(x, 'of', max(time_layers), 'done')) | |
return(output) | |
} | |
#iterate over all the time values in a dataframe to create a list of df's with each list holding a df with temp values for that time linked to lon and lat | |
#NOTE this will take a while to run as its creating 2054 (of value of temp_anom_array)[2] lists of 5024 values - approx 10 mil values | |
list_of_grid_tmax_data <- map(time_layers, layer_extract, df = tmax_nc_berk) | |
list_of_grid_tmin_data <- map(time_layers, layer_extract, df = tmin_nc_berk) | |
#bind the lists into a single df | |
berk_raw_tmax <- do.call('rbind', list_of_grid_tmax_data) | |
berk_raw_tmin <- do.call('rbind', list_of_grid_tmin_data) | |
#Join and clean the two data frames | |
berk_raw_full <- berk_raw_tmax %>% | |
rename(tmax = temp_anom) %>% | |
full_join(berk_raw_tmin, by = c('date', 'lat', 'lon')) %>% | |
rename(tmin = temp_anom) | |
#Create the dataframe of the 3rd IQR | |
#Do this so that I have a starting record for each area - the starting record is assumed to be the 75 percentile of the anomalous period - 1951-1980 | |
berk_annual_iqrs <- berk_raw_full %>% | |
filter(!(is.na(tmax) | is.na(tmin))) %>% #get rid of NA to make processing easier | |
filter(date > as.Date('1951-01-01') & date < as.Date('1980-12-31')) %>% #set the period for IQR | |
mutate(year = as.integer(year(date)), #correct the data format | |
lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create a single object by which i can group observations | |
group_by(year, lat_lon_pair) %>% #group the data | |
summarize(tmax = mean(tmax, na.rm = T), #calculate the means so that i can visualize it annually - otherwise there would be too many iterations to visualize | |
tmin = mean(tmin, na.rm = T)) %>% | |
ungroup() %>% | |
group_by(lat_lon_pair) %>% | |
summarize(tmax_upper_q = quantile(tmax, na.rm = T)[4],#pull the 4th object from the IQR - 75% cutoff for high temps | |
tmin_lower_q = quantile(tmin, na.rm = T)[2]) #pull the 2nd object from the IQR - 25% cutoff for low temps | |
#Identify all the instances in which records are broken and the anomalies at which they are broken. | |
berk_records <- berk_raw_full %>% | |
filter(!(is.na(tmax) | is.na(tmin))) %>% #get rid of NA to make processing easier | |
mutate(year = as.integer(year(date)), #clean data types | |
lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create single object for grouping | |
group_by(year, lat_lon_pair) %>% | |
summarize(tmax = mean(tmax, na.rm = T), #calculate the annual mean temp high and low anomaly for each year for each location | |
tmin = mean(tmin, na.rm = T)) %>% | |
ungroup() %>% | |
arrange(year) %>% | |
group_by(lat_lon_pair) %>% | |
mutate(cumulative_record_h = cummax(tmax), #calculate the cummulative maximum temp and cumulative minimum temp for each location | |
cumulative_record_l = cummin(tmin)) %>% | |
left_join(berk_annual_iqrs, by = 'lat_lon_pair') %>% #join the IQR data so that i can check records - against cumulative max/min or IQR | |
mutate(record_h = ifelse(tmax_upper_q > cumulative_record_h, tmax_upper_q, cumulative_record_h), #check whether the record is the IQR or the cummulative max/min | |
record_l = ifelse(tmin_lower_q < cumulative_record_l, tmin_lower_q, cumulative_record_l)) %>% | |
mutate(record_break_h = ifelse(tmax >= record_h, T, F), #identify instances where records occur | |
record_break_l = ifelse(tmin <= record_l, T, F)) %>% | |
select(year, lat_lon_pair, tmin, tmax, record_break_h, record_break_l) %>% #clean the data | |
separate(lat_lon_pair, into = c('lat', 'lon'), sep = '/') %>% | |
mutate_at(.vars = vars(lat, lon), .funs = as.numeric) %>% | |
ungroup() %>% | |
pivot_longer(cols = c(record_break_h, record_break_l), names_to = 'record') %>% | |
filter(value == T) %>% | |
mutate(recordtemp = ifelse(record == 'record_break_l', tmin, tmax)) | |
#transform the data so that it can be visualized effectively | |
#Essentially i have to add iterations to the data which causes the dots to shrink and fade for each year | |
berk_record_final <- berk_records %>% | |
mutate(recordtemp = ifelse(record == 'record_break_l' & recordtemp < -5, -5, #first squash temperature records such that more 5 or less than -5 goes to 5, this avoids the temperature scale from all shifting to the middle due to extremes | |
ifelse(record == 'record_break_h' & recordtemp > 5, 5, recordtemp))) %>% | |
mutate(alpha_1 = 1, alpha_2 = 0.5, alpha_3 = 0.1) %>% #then create three columns for the alpha value for grpahing | |
pivot_longer(grep('_', colnames(.)), names_to = 'iteration', values_to = 'alpha') %>% #pivot those into longer format to there are 3 alpha values for each year for each location | |
mutate(iteration = as.numeric(gsub('[a-z]+_', '0.', iteration))) %>% #Make the iterations useful | |
mutate(iteration = as.double(year) + iteration) %>% | |
mutate(size = ifelse(alpha == 1, 1, #create size values for each alpha - i.e. dots decrease in size and increase in transparency | |
ifelse(alpha == 0.5, 0.5, 0.1))) %>% | |
arrange(iteration) | |
#get the world map for graphing | |
world <- map_data('world') | |
#Get the number of frames for animating the viz | |
nframes = length(unique(berk_record_final$year)) * 3 | |
#Create the first animation | |
a1 <- ggplot() + | |
geom_polygon(data = world, #plot the world on an empty plot | |
aes(x = long, y = lat, group = group), | |
fill = NA, col = 'grey') + | |
geom_point(data = berk_record_final, #plot the points for the records | |
aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) + | |
scale_color_gradient2(low = 'blue', mid = 'white', high = 'red', #create the color graident | |
limits = c(-5, 5), midpoint = 0, | |
labels = c('< -5', '-2.5', '0', '2.5', '> 5'), | |
name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) + | |
theme_minimal(base_size = 13.5) + | |
theme(axis.text.x = element_blank(), | |
axis.text.y = element_blank()) + | |
guides(size = 'none', | |
alpha = 'none') + | |
labs(x = element_blank(), | |
y = element_blank(), | |
title = 'Moments when record high and low temperatures were set at the specific location', | |
subtitle = 'Date: {1850 + frame %/% 3}') + | |
transition_manual(iteration) #transition manually between frames with no tweening or othe modifications - thus why i had to make all the iterations - note i coulnd't get fade in and out to work | |
#create the animation with gganimate | |
#height and width are in px | |
#fps doesn't matter here as i will remake the animation later with stated fps/delay under magic | |
anim1 <- animate(a1, nframes = nframes, fps = 6, height = 500, width = 600) | |
#Becauese i want to stich the two visualizations together, i convert to image magick object which can be appended in that way | |
anim1_magic <- image_read(anim1) | |
#I want the visualization to pause on the last frame, so i repeat it 12 times (i.e. 4 seconds - later set 3 fps) | |
#first create the viz - writing the image to file | |
png(filename = 'a1_last_image.png', | |
height = 500, width = 600, unit = 'px') | |
plot <- ggplot() + | |
geom_polygon(data = world, | |
aes(x = long, y = lat, group = group), | |
fill = NA, col = 'grey') + | |
geom_point(data = filter(berk_record_final, year == 2021), | |
aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) + | |
scale_color_gradient2(low = 'blue', mid = 'white', high = 'red', | |
limits = c(-5, 5), midpoint = 0, | |
labels = c('< -5', '-2.5', '0', '2.5', '> 5'), | |
name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) + | |
theme_minimal(base_size = 13.5) + | |
theme(axis.text.x = element_blank(), | |
axis.text.y = element_blank()) + | |
guides(size = 'none', | |
alpha = 'none') + | |
labs(x = element_blank(), | |
y = element_blank(), | |
title = 'Moments when record high and low temperatures were set at the specific location', | |
subtitle = 'Date: 2021') | |
print(plot) | |
dev.off() | |
#Then read the image as an image magick object | |
a1_last_img_magic <- image_read('a1_last_image.png') | |
#Clean up - get rid of a1_last_img_magic | |
unlink('a1_last_image.png') | |
#Then append the last image (repeated 12 times) to animation of the image magick object created above | |
#Now i have the full animation for the first object as a list image magic onjects | |
anim1_full_magic <- c(anim1_magic, rep(a1_last_img_magic, 12)) | |
#move to create the second visualization - the cumulative records | |
#Note i couldn't get this to work by creating a gganimation and then turning into a magick object as above | |
#For some reason the magick object would have fewer frames - see here: https://stackoverflow.com/questions/68853820/image-readgif-produces-a-different-number-of-frames-to-original-gif | |
#So instead i will loop over the data frame creating the required images for the visualization. | |
#Then convert each to a magick object and append | |
#Note i want to tween the data between points, thus i have to create the interpolated data myself | |
#First get a complete data frame for the cumulative data | |
temp_berk_inter <- data.frame(year = 1850:2021) %>% #create empty dataframe of year data | |
arrange(year) %>% | |
mutate('record_break_h', 'record_break_l') %>% #create record categories | |
pivot_longer(-year, values_to = 'record') %>% #pivot longer | |
select(-name) %>% | |
mutate(1, 2, 3) %>% #create the iterations based on which i'll make the visualization | |
pivot_longer(-c(year, record), names_to = 'iteration') %>% #pivot and clean | |
select(-value) | |
#create a data frame with two records for each year - i am sure this could be build more effectively | |
complete_record <- data.frame(year = rep(1850:2021, 2), | |
record = c(rep('record_break_h', 2022-1850), rep('record_break_l', 2022-1850))) %>% | |
arrange(year) | |
#create the final full data frame | |
temp_berk <- berk_record_final %>% | |
ungroup() %>% | |
group_by(year, record) %>% | |
summarize(n = n()) %>% #take the final records only database and count the number of times a record gets set that year | |
full_join(complete_record, by = c('year', 'record')) %>% #Then join to the complete records so now i have the cumulative high and low record for every year | |
arrange(year) %>% | |
replace_na(list(n = 0)) %>% #Create zero for all the years no records were set (previously zero years were skipped) | |
ungroup() %>% | |
group_by(record) %>% | |
mutate(cum_n = cumsum(n)) %>% #Now count them again so that i have the cumulative record for a dataframe of complete years | |
ungroup() %>% | |
mutate(iteration = '3') %>% #Effectively i want to tween over three iterations a year (to match the first image). So i label each one of these as iteration 3 - where i want the point to be at the end of the year | |
right_join(temp_berk_inter, by = c('year', 'record', 'iteration')) %>% #The join with the iteration data frame - so i have three iterations for high and low records | |
arrange(year, record, iteration) %>% | |
mutate(cum_n = ifelse(year == 1850 & iteration == '1', 0, cum_n)) %>% #Have to create the first value - or else i cannot interpolate | |
group_by(record) %>% #group by records and interpolate between them | |
mutate(interpol = na.approx(cum_n)) %>% #interpolate (from zoo) | |
ungroup() %>% | |
mutate(n = 1:nrow(.)) #Add nrow for iteration puporses | |
#create a directory of temp_images in which to store the images i will create for the animation | |
dir.create('temp_images') | |
#Now loop over the data frame and create the images | |
#This dumps files into the folder called 'temp_images' | |
for (y in 1851:2021) { #for every year | |
for (i in 1:3) { #and every iteration | |
#write the following plot | |
png(filename = paste0('temp_images/image', y, '-', i, '.png'), | |
width = 600, height = 200, unit = 'px') | |
plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < y) & (iteration == i)), | |
aes(x = year, y = interpol, col = record, group = record)) + | |
geom_line(lwd = 1) + | |
geom_point(data = filter(temp_berk, (year == y & iteration == i)), | |
aes(x = year, y = interpol, col = record, group = record), | |
size = 4) + | |
coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) + | |
theme_minimal(base_size = 13.5) + | |
labs(y = 'Cumulative records', | |
x = 'Year') + | |
scale_color_manual(name = 'Record:', | |
label = c('High', 'Low'), | |
values = c('#FF7B7B', '#2A9DF4')) + | |
labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date. | |
Source: Berkley Earth | |
Credit: Visualization created by James Morrissey') | |
print(plot) | |
dev.off() | |
} | |
} | |
#Again because i want the viz to pause on the last image, i create a version of the last image | |
png(filename = 'a2_last_image.png', | |
width = 600, height = 200, unit = 'px') | |
plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < 2021) & (iteration == 3)), | |
aes(x = year, y = interpol, col = record, group = record)) + | |
geom_line(lwd = 1) + | |
geom_point(data = filter(temp_berk, (year == 2021 & iteration == 3)), | |
aes(x = year, y = interpol, col = record, group = record), | |
size = 4) + | |
coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) + | |
theme_minimal(base_size = 13.5) + | |
labs(y = 'Cumulative records', | |
x = 'Year') + | |
scale_color_manual(name = 'Record:', | |
label = c('High', 'Low'), | |
values = c('#FF7B7B', '#2A9DF4')) + | |
labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date. | |
Source: Berkley Earth. | |
Credit: Visualization created by James Morrissey') | |
print(plot) | |
dev.off() | |
#Then i get the list of file names in the temp folder | |
a2_image_file_names <- list.files('temp_images/', full.names = T) | |
#I add the last image 12 times | |
a2_image_file_names <- c(a2_image_file_names, rep('a2_last_image.png', 12)) | |
#I them convert all images into image magick objects | |
a2_img_list <- map(a2_image_file_names, image_read) | |
#clean up - delete all the images i just created and the temp folder and the a2_last_image.png | |
unlink('temp_images', recursive = T) | |
unlink('a2_last_image.png') | |
#Then join all the image magick objects into a single image magick object | |
anim2_full_magic <- image_join(a2_img_list) | |
#Now we have the two full animations as image magick objects | |
#Now stick the two objects together | |
#Then iterate over the two image magick objects for the two visualizations and stick them together frame-by-frame | |
anim1_2_full_magic <- image_append(c(anim1_full_magic[1], anim2_full_magic[1]), stack = T) | |
for(i in 2:525) { | |
combined <- image_append(c(anim1_full_magic[i], anim2_full_magic[i]), stack = T) | |
anim1_2_full_magic <- c(anim1_2_full_magic, combined) | |
} | |
#Now write as a gif - use the gifski renderer (image_write_gif) as this is much faster | |
image_write_gif(anim1_2_full_magic, path = 'anim_full.gif', delay = rep(c(0.2, 0.5, 0.2), 525/3)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment