Skip to content

Instantly share code, notes, and snippets.

@morrisseyj
Last active September 9, 2021 22:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save morrisseyj/3f88db1afb4bc2562d3648a523558d0f to your computer and use it in GitHub Desktop.
Save morrisseyj/3f88db1afb4bc2562d3648a523558d0f to your computer and use it in GitHub Desktop.
Heat record visualization
#The below code is used to support the generation of the analysis informing this blog: [insert html]
#Note this only pertains to the animated visualization, not the bar chart on that blog
library('ncdf4') # package for netcdf manipulation
library('ggplot2') # package for plotting
library('tidyverse') #for wrangling
library('lubridate') #for dates
library('gganimate') #for animation
library('magick') #for animation
library('zoo') #for NA
library('purrr') #to map functions
#Download the files for high and low temps using equal area, from here: http://berkeleyearth.org/data/
download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMAX_EqualArea.nc', 'berk_tmax_equal_area.nc', mode = 'wb')
download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMIN_EqualArea.nc', 'berk_tmin_equal_area.nc', mode = 'wb')
#import the Berkely earth data - from here: http://berkeleyearth.org/data/
tmax_nc_berk <- nc_open('berk_tmax_equal_area.nc')
tmin_nc_berk <- nc_open('berk_tmin_equal_area.nc')
#looking at the nc_berk data, we have:
#A two dimensional array of temperature data, with time and temperature data, for each lon and lat
#Plan is to extract the array for every time period, match it to the lat and lon data and stick it in a single df
#First get the array
temp_anom_array <- ncvar_get(tmax_nc_berk, 'temperature')
#Get the time dimensions of the array - which is dimension 2 in this case
time_layers <- 1:dim(temp_anom_array)[2]
#Then write a function that creates a df for a stated time value
#Function takes a df and an index number for the array and produces a df with the time value and temperature data for that index, for every lat, long combination
layer_extract <- function(df, x) {
output <- data.frame(date = ncvar_get(df, 'time')[x], #grab the indexed time var
lon = ncvar_get(df, 'longitude'), #grab the lat and lon
lat = ncvar_get(df, 'latitude'),
temp_anom = ncvar_get(df, 'temperature')[,x]) #grab the indexed temperature var
#Clean the output
output <- output %>%
separate(date, into = c('year', 'day'), sep = '\\.') %>%
mutate(day = as.numeric(paste0('0.', day))) %>%
mutate(days = ifelse(leap_year(as.numeric(year)), 366, 365)) %>%
mutate(day = round(day * days, 0)) %>%
mutate(date = as.Date(day, format = '%j', origin = as.Date(paste0(year, '-01-01')))) %>%
select(date, lon, lat, temp_anom)
print(paste(x, 'of', max(time_layers), 'done'))
return(output)
}
#iterate over all the time values in a dataframe to create a list of df's with each list holding a df with temp values for that time linked to lon and lat
#NOTE this will take a while to run as its creating 2054 (of value of temp_anom_array)[2] lists of 5024 values - approx 10 mil values
list_of_grid_tmax_data <- map(time_layers, layer_extract, df = tmax_nc_berk)
list_of_grid_tmin_data <- map(time_layers, layer_extract, df = tmin_nc_berk)
#bind the lists into a single df
berk_raw_tmax <- do.call('rbind', list_of_grid_tmax_data)
berk_raw_tmin <- do.call('rbind', list_of_grid_tmin_data)
#Join and clean the two data frames
berk_raw_full <- berk_raw_tmax %>%
rename(tmax = temp_anom) %>%
full_join(berk_raw_tmin, by = c('date', 'lat', 'lon')) %>%
rename(tmin = temp_anom)
#Create the dataframe of the 3rd IQR
#Do this so that I have a starting record for each area - the starting record is assumed to be the 75 percentile of the anomalous period - 1951-1980
berk_annual_iqrs <- berk_raw_full %>%
filter(!(is.na(tmax) | is.na(tmin))) %>% #get rid of NA to make processing easier
filter(date > as.Date('1951-01-01') & date < as.Date('1980-12-31')) %>% #set the period for IQR
mutate(year = as.integer(year(date)), #correct the data format
lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create a single object by which i can group observations
group_by(year, lat_lon_pair) %>% #group the data
summarize(tmax = mean(tmax, na.rm = T), #calculate the means so that i can visualize it annually - otherwise there would be too many iterations to visualize
tmin = mean(tmin, na.rm = T)) %>%
ungroup() %>%
group_by(lat_lon_pair) %>%
summarize(tmax_upper_q = quantile(tmax, na.rm = T)[4],#pull the 4th object from the IQR - 75% cutoff for high temps
tmin_lower_q = quantile(tmin, na.rm = T)[2]) #pull the 2nd object from the IQR - 25% cutoff for low temps
#Identify all the instances in which records are broken and the anomalies at which they are broken.
berk_records <- berk_raw_full %>%
filter(!(is.na(tmax) | is.na(tmin))) %>% #get rid of NA to make processing easier
mutate(year = as.integer(year(date)), #clean data types
lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create single object for grouping
group_by(year, lat_lon_pair) %>%
summarize(tmax = mean(tmax, na.rm = T), #calculate the annual mean temp high and low anomaly for each year for each location
tmin = mean(tmin, na.rm = T)) %>%
ungroup() %>%
arrange(year) %>%
group_by(lat_lon_pair) %>%
mutate(cumulative_record_h = cummax(tmax), #calculate the cummulative maximum temp and cumulative minimum temp for each location
cumulative_record_l = cummin(tmin)) %>%
left_join(berk_annual_iqrs, by = 'lat_lon_pair') %>% #join the IQR data so that i can check records - against cumulative max/min or IQR
mutate(record_h = ifelse(tmax_upper_q > cumulative_record_h, tmax_upper_q, cumulative_record_h), #check whether the record is the IQR or the cummulative max/min
record_l = ifelse(tmin_lower_q < cumulative_record_l, tmin_lower_q, cumulative_record_l)) %>%
mutate(record_break_h = ifelse(tmax >= record_h, T, F), #identify instances where records occur
record_break_l = ifelse(tmin <= record_l, T, F)) %>%
select(year, lat_lon_pair, tmin, tmax, record_break_h, record_break_l) %>% #clean the data
separate(lat_lon_pair, into = c('lat', 'lon'), sep = '/') %>%
mutate_at(.vars = vars(lat, lon), .funs = as.numeric) %>%
ungroup() %>%
pivot_longer(cols = c(record_break_h, record_break_l), names_to = 'record') %>%
filter(value == T) %>%
mutate(recordtemp = ifelse(record == 'record_break_l', tmin, tmax))
#transform the data so that it can be visualized effectively
#Essentially i have to add iterations to the data which causes the dots to shrink and fade for each year
berk_record_final <- berk_records %>%
mutate(recordtemp = ifelse(record == 'record_break_l' & recordtemp < -5, -5, #first squash temperature records such that more 5 or less than -5 goes to 5, this avoids the temperature scale from all shifting to the middle due to extremes
ifelse(record == 'record_break_h' & recordtemp > 5, 5, recordtemp))) %>%
mutate(alpha_1 = 1, alpha_2 = 0.5, alpha_3 = 0.1) %>% #then create three columns for the alpha value for grpahing
pivot_longer(grep('_', colnames(.)), names_to = 'iteration', values_to = 'alpha') %>% #pivot those into longer format to there are 3 alpha values for each year for each location
mutate(iteration = as.numeric(gsub('[a-z]+_', '0.', iteration))) %>% #Make the iterations useful
mutate(iteration = as.double(year) + iteration) %>%
mutate(size = ifelse(alpha == 1, 1, #create size values for each alpha - i.e. dots decrease in size and increase in transparency
ifelse(alpha == 0.5, 0.5, 0.1))) %>%
arrange(iteration)
#get the world map for graphing
world <- map_data('world')
#Get the number of frames for animating the viz
nframes = length(unique(berk_record_final$year)) * 3
#Create the first animation
a1 <- ggplot() +
geom_polygon(data = world, #plot the world on an empty plot
aes(x = long, y = lat, group = group),
fill = NA, col = 'grey') +
geom_point(data = berk_record_final, #plot the points for the records
aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) +
scale_color_gradient2(low = 'blue', mid = 'white', high = 'red', #create the color graident
limits = c(-5, 5), midpoint = 0,
labels = c('< -5', '-2.5', '0', '2.5', '> 5'),
name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) +
theme_minimal(base_size = 13.5) +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank()) +
guides(size = 'none',
alpha = 'none') +
labs(x = element_blank(),
y = element_blank(),
title = 'Moments when record high and low temperatures were set at the specific location',
subtitle = 'Date: {1850 + frame %/% 3}') +
transition_manual(iteration) #transition manually between frames with no tweening or othe modifications - thus why i had to make all the iterations - note i coulnd't get fade in and out to work
#create the animation with gganimate
#height and width are in px
#fps doesn't matter here as i will remake the animation later with stated fps/delay under magic
anim1 <- animate(a1, nframes = nframes, fps = 6, height = 500, width = 600)
#Becauese i want to stich the two visualizations together, i convert to image magick object which can be appended in that way
anim1_magic <- image_read(anim1)
#I want the visualization to pause on the last frame, so i repeat it 12 times (i.e. 4 seconds - later set 3 fps)
#first create the viz - writing the image to file
png(filename = 'a1_last_image.png',
height = 500, width = 600, unit = 'px')
plot <- ggplot() +
geom_polygon(data = world,
aes(x = long, y = lat, group = group),
fill = NA, col = 'grey') +
geom_point(data = filter(berk_record_final, year == 2021),
aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) +
scale_color_gradient2(low = 'blue', mid = 'white', high = 'red',
limits = c(-5, 5), midpoint = 0,
labels = c('< -5', '-2.5', '0', '2.5', '> 5'),
name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) +
theme_minimal(base_size = 13.5) +
theme(axis.text.x = element_blank(),
axis.text.y = element_blank()) +
guides(size = 'none',
alpha = 'none') +
labs(x = element_blank(),
y = element_blank(),
title = 'Moments when record high and low temperatures were set at the specific location',
subtitle = 'Date: 2021')
print(plot)
dev.off()
#Then read the image as an image magick object
a1_last_img_magic <- image_read('a1_last_image.png')
#Clean up - get rid of a1_last_img_magic
unlink('a1_last_image.png')
#Then append the last image (repeated 12 times) to animation of the image magick object created above
#Now i have the full animation for the first object as a list image magic onjects
anim1_full_magic <- c(anim1_magic, rep(a1_last_img_magic, 12))
#move to create the second visualization - the cumulative records
#Note i couldn't get this to work by creating a gganimation and then turning into a magick object as above
#For some reason the magick object would have fewer frames - see here: https://stackoverflow.com/questions/68853820/image-readgif-produces-a-different-number-of-frames-to-original-gif
#So instead i will loop over the data frame creating the required images for the visualization.
#Then convert each to a magick object and append
#Note i want to tween the data between points, thus i have to create the interpolated data myself
#First get a complete data frame for the cumulative data
temp_berk_inter <- data.frame(year = 1850:2021) %>% #create empty dataframe of year data
arrange(year) %>%
mutate('record_break_h', 'record_break_l') %>% #create record categories
pivot_longer(-year, values_to = 'record') %>% #pivot longer
select(-name) %>%
mutate(1, 2, 3) %>% #create the iterations based on which i'll make the visualization
pivot_longer(-c(year, record), names_to = 'iteration') %>% #pivot and clean
select(-value)
#create a data frame with two records for each year - i am sure this could be build more effectively
complete_record <- data.frame(year = rep(1850:2021, 2),
record = c(rep('record_break_h', 2022-1850), rep('record_break_l', 2022-1850))) %>%
arrange(year)
#create the final full data frame
temp_berk <- berk_record_final %>%
ungroup() %>%
group_by(year, record) %>%
summarize(n = n()) %>% #take the final records only database and count the number of times a record gets set that year
full_join(complete_record, by = c('year', 'record')) %>% #Then join to the complete records so now i have the cumulative high and low record for every year
arrange(year) %>%
replace_na(list(n = 0)) %>% #Create zero for all the years no records were set (previously zero years were skipped)
ungroup() %>%
group_by(record) %>%
mutate(cum_n = cumsum(n)) %>% #Now count them again so that i have the cumulative record for a dataframe of complete years
ungroup() %>%
mutate(iteration = '3') %>% #Effectively i want to tween over three iterations a year (to match the first image). So i label each one of these as iteration 3 - where i want the point to be at the end of the year
right_join(temp_berk_inter, by = c('year', 'record', 'iteration')) %>% #The join with the iteration data frame - so i have three iterations for high and low records
arrange(year, record, iteration) %>%
mutate(cum_n = ifelse(year == 1850 & iteration == '1', 0, cum_n)) %>% #Have to create the first value - or else i cannot interpolate
group_by(record) %>% #group by records and interpolate between them
mutate(interpol = na.approx(cum_n)) %>% #interpolate (from zoo)
ungroup() %>%
mutate(n = 1:nrow(.)) #Add nrow for iteration puporses
#create a directory of temp_images in which to store the images i will create for the animation
dir.create('temp_images')
#Now loop over the data frame and create the images
#This dumps files into the folder called 'temp_images'
for (y in 1851:2021) { #for every year
for (i in 1:3) { #and every iteration
#write the following plot
png(filename = paste0('temp_images/image', y, '-', i, '.png'),
width = 600, height = 200, unit = 'px')
plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < y) & (iteration == i)),
aes(x = year, y = interpol, col = record, group = record)) +
geom_line(lwd = 1) +
geom_point(data = filter(temp_berk, (year == y & iteration == i)),
aes(x = year, y = interpol, col = record, group = record),
size = 4) +
coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) +
theme_minimal(base_size = 13.5) +
labs(y = 'Cumulative records',
x = 'Year') +
scale_color_manual(name = 'Record:',
label = c('High', 'Low'),
values = c('#FF7B7B', '#2A9DF4')) +
labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date.
Source: Berkley Earth
Credit: Visualization created by James Morrissey')
print(plot)
dev.off()
}
}
#Again because i want the viz to pause on the last image, i create a version of the last image
png(filename = 'a2_last_image.png',
width = 600, height = 200, unit = 'px')
plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < 2021) & (iteration == 3)),
aes(x = year, y = interpol, col = record, group = record)) +
geom_line(lwd = 1) +
geom_point(data = filter(temp_berk, (year == 2021 & iteration == 3)),
aes(x = year, y = interpol, col = record, group = record),
size = 4) +
coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) +
theme_minimal(base_size = 13.5) +
labs(y = 'Cumulative records',
x = 'Year') +
scale_color_manual(name = 'Record:',
label = c('High', 'Low'),
values = c('#FF7B7B', '#2A9DF4')) +
labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date.
Source: Berkley Earth.
Credit: Visualization created by James Morrissey')
print(plot)
dev.off()
#Then i get the list of file names in the temp folder
a2_image_file_names <- list.files('temp_images/', full.names = T)
#I add the last image 12 times
a2_image_file_names <- c(a2_image_file_names, rep('a2_last_image.png', 12))
#I them convert all images into image magick objects
a2_img_list <- map(a2_image_file_names, image_read)
#clean up - delete all the images i just created and the temp folder and the a2_last_image.png
unlink('temp_images', recursive = T)
unlink('a2_last_image.png')
#Then join all the image magick objects into a single image magick object
anim2_full_magic <- image_join(a2_img_list)
#Now we have the two full animations as image magick objects
#Now stick the two objects together
#Then iterate over the two image magick objects for the two visualizations and stick them together frame-by-frame
anim1_2_full_magic <- image_append(c(anim1_full_magic[1], anim2_full_magic[1]), stack = T)
for(i in 2:525) {
combined <- image_append(c(anim1_full_magic[i], anim2_full_magic[i]), stack = T)
anim1_2_full_magic <- c(anim1_2_full_magic, combined)
}
#Now write as a gif - use the gifski renderer (image_write_gif) as this is much faster
image_write_gif(anim1_2_full_magic, path = 'anim_full.gif', delay = rep(c(0.2, 0.5, 0.2), 525/3))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment