morrisseyj/gridded_temp_data.r Secret

## gridded_temp_data.r
#The below code is used to support the generation of the analysis informing this blog: [insert html]
#Note this only pertains to the animated visualization, not the bar chart on that blog

library('ncdf4') # package for netcdf manipulation
library('ggplot2') # package for plotting
library('tidyverse') #for wrangling
library('lubridate') #for dates
library('gganimate') #for animation
library('magick') #for animation
library('zoo') #for NA
library('purrr') #to map functions

#Download the files for high and low temps using equal area, from here: http://berkeleyearth.org/data/
download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMAX_EqualArea.nc', 'berk_tmax_equal_area.nc', mode = 'wb')
download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMIN_EqualArea.nc', 'berk_tmin_equal_area.nc', mode = 'wb')

#import the Berkely earth data - from here: http://berkeleyearth.org/data/
tmax_nc_berk <- nc_open('berk_tmax_equal_area.nc')
tmin_nc_berk <- nc_open('berk_tmin_equal_area.nc')

#looking at the nc_berk data, we have:
#A two dimensional array of temperature data, with time and temperature data, for each lon and lat
#Plan is to extract the array for every time period, match it to the lat and lon data and stick it in a single df

#First get the array
temp_anom_array <- ncvar_get(tmax_nc_berk, 'temperature')

#Get the time dimensions of the array - which is dimension 2 in this case
time_layers <- 1:dim(temp_anom_array)[2]

#Then write a function that creates a df for a stated time value
#Function takes a df and an index number for the array and produces a df with the time value and temperature data for that index, for every lat, long combination
layer_extract <- function(df, x) {
  output <- data.frame(date = ncvar_get(df, 'time')[x], #grab the indexed time var
                       lon = ncvar_get(df, 'longitude'), #grab the lat and lon
                       lat = ncvar_get(df, 'latitude'),
                       temp_anom = ncvar_get(df, 'temperature')[,x]) #grab the indexed temperature var
  #Clean the output
  output <- output %>%
    separate(date, into = c('year', 'day'), sep = '\\.') %>%
    mutate(day = as.numeric(paste0('0.', day))) %>%
    mutate(days = ifelse(leap_year(as.numeric(year)), 366, 365)) %>%
    mutate(day = round(day * days, 0)) %>%
    mutate(date = as.Date(day, format = '%j', origin = as.Date(paste0(year, '-01-01')))) %>%
    select(date, lon, lat, temp_anom)

  print(paste(x, 'of', max(time_layers), 'done'))

  return(output)
}

#iterate over all the time values in a dataframe to create a list of df's with each list holding a df with temp values for that time linked to lon and lat
#NOTE this will take a while to run as its creating 2054 (of value of temp_anom_array)[2] lists of 5024 values - approx 10 mil values
list_of_grid_tmax_data <- map(time_layers, layer_extract, df = tmax_nc_berk)
list_of_grid_tmin_data <- map(time_layers, layer_extract, df = tmin_nc_berk)

#bind the lists into a single df
berk_raw_tmax <- do.call('rbind', list_of_grid_tmax_data)
berk_raw_tmin <- do.call('rbind', list_of_grid_tmin_data)

#Join and clean the two data frames
berk_raw_full <- berk_raw_tmax %>%
  rename(tmax = temp_anom) %>%
  full_join(berk_raw_tmin, by = c('date', 'lat', 'lon')) %>%
  rename(tmin = temp_anom)

#Create the dataframe of the 3rd IQR
#Do this so that I have a starting record for each area - the starting record is assumed to be the 75 percentile of the anomalous period - 1951-1980
berk_annual_iqrs <- berk_raw_full %>%
  filter(!(is.na(tmax) | is.na(tmin))) %>% #get rid of NA to make processing easier
  filter(date > as.Date('1951-01-01') & date < as.Date('1980-12-31')) %>% #set the period for IQR
  mutate(year = as.integer(year(date)), #correct the data format
         lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create a single object by which i can group observations
  group_by(year, lat_lon_pair) %>% #group the data
  summarize(tmax = mean(tmax, na.rm = T), #calculate the means so that i can visualize it annually - otherwise there would be too many iterations to visualize
            tmin = mean(tmin, na.rm = T)) %>%
  ungroup() %>%
  group_by(lat_lon_pair) %>%
  summarize(tmax_upper_q = quantile(tmax, na.rm = T)[4],#pull the 4th object from the IQR - 75% cutoff for high temps
            tmin_lower_q = quantile(tmin, na.rm  = T)[2]) #pull the 2nd object from the IQR - 25% cutoff for low temps

#Identify all the instances in which records are broken and the anomalies at which they are broken.
berk_records <- berk_raw_full %>%
  filter(!(is.na(tmax) | is.na(tmin))) %>% #get rid of NA to make processing easier
  mutate(year = as.integer(year(date)), #clean data types
         lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create single object for grouping
  group_by(year, lat_lon_pair) %>%
  summarize(tmax = mean(tmax, na.rm = T), #calculate the annual mean temp high and low anomaly for each year for each location
            tmin = mean(tmin, na.rm = T)) %>%
  ungroup() %>%
  arrange(year) %>%
  group_by(lat_lon_pair) %>%
  mutate(cumulative_record_h = cummax(tmax), #calculate the cummulative maximum temp and cumulative minimum temp for each location
         cumulative_record_l = cummin(tmin)) %>%
  left_join(berk_annual_iqrs, by = 'lat_lon_pair') %>% #join the IQR data so that i can check records - against cumulative max/min or IQR
  mutate(record_h = ifelse(tmax_upper_q > cumulative_record_h, tmax_upper_q, cumulative_record_h), #check whether the record is the IQR or the cummulative max/min
         record_l = ifelse(tmin_lower_q < cumulative_record_l, tmin_lower_q, cumulative_record_l)) %>%
  mutate(record_break_h = ifelse(tmax >= record_h, T, F), #identify instances where records occur
         record_break_l = ifelse(tmin <= record_l, T, F)) %>%
  select(year, lat_lon_pair, tmin, tmax, record_break_h, record_break_l) %>% #clean the data
  separate(lat_lon_pair, into = c('lat', 'lon'), sep = '/') %>%
  mutate_at(.vars = vars(lat, lon), .funs = as.numeric) %>%
  ungroup() %>%
  pivot_longer(cols = c(record_break_h, record_break_l), names_to = 'record') %>%
  filter(value == T) %>%
  mutate(recordtemp = ifelse(record == 'record_break_l', tmin, tmax))

#transform the data so that it can be visualized effectively
#Essentially i have to add iterations to the data which causes the dots to shrink and fade for each year
berk_record_final <- berk_records %>%
  mutate(recordtemp = ifelse(record == 'record_break_l' & recordtemp < -5, -5, #first squash temperature records such that more 5 or less than -5 goes to 5, this avoids the temperature scale from all shifting to the middle due to extremes
                             ifelse(record == 'record_break_h' & recordtemp > 5, 5, recordtemp))) %>%
  mutate(alpha_1 = 1, alpha_2 = 0.5, alpha_3 = 0.1) %>% #then create three columns for the alpha value for grpahing
  pivot_longer(grep('_', colnames(.)), names_to = 'iteration', values_to = 'alpha') %>% #pivot those into longer format to there are 3 alpha values for each year for each location
  mutate(iteration = as.numeric(gsub('[a-z]+_', '0.', iteration))) %>% #Make the iterations useful
  mutate(iteration = as.double(year) + iteration) %>%
  mutate(size = ifelse(alpha == 1, 1, #create size values for each alpha - i.e. dots decrease in size and increase in transparency
                       ifelse(alpha == 0.5, 0.5, 0.1))) %>%
  arrange(iteration)

#get the world map for graphing
world <- map_data('world')

#Get the number of frames for animating the viz
nframes = length(unique(berk_record_final$year)) * 3

#Create the first animation
a1 <- ggplot() +
  geom_polygon(data = world, #plot the world on an empty plot
               aes(x = long, y = lat, group = group),
               fill = NA, col = 'grey') +
  geom_point(data = berk_record_final, #plot the points for the records
             aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) +
  scale_color_gradient2(low = 'blue', mid = 'white', high = 'red', #create the color graident
                        limits = c(-5, 5), midpoint = 0,
                        labels = c('< -5', '-2.5', '0', '2.5', '> 5'),
                        name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) +
  theme_minimal(base_size = 13.5) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank()) +
  guides(size = 'none',
         alpha = 'none') +
  labs(x = element_blank(),
       y = element_blank(),
       title = 'Moments when record high and low temperatures were set at the specific location',
       subtitle = 'Date: {1850 + frame %/% 3}') +
  transition_manual(iteration) #transition manually between frames with no tweening or othe modifications - thus why i had to make all the iterations - note i coulnd't get fade in and out to work

#create the animation with gganimate
#height and width are in px
#fps doesn't matter here as i will remake the animation later with stated fps/delay under magic
anim1 <- animate(a1, nframes = nframes, fps = 6, height = 500, width = 600)

#Becauese i want to stich the two visualizations together, i convert to image magick object which can be appended in that way
anim1_magic <- image_read(anim1)

#I want the visualization to pause on the last frame, so i repeat it 12 times (i.e. 4 seconds - later set 3 fps)
#first create the viz - writing the image to file
png(filename = 'a1_last_image.png',
    height = 500, width = 600, unit = 'px')

plot <- ggplot() +
  geom_polygon(data = world,
               aes(x = long, y = lat, group = group),
               fill = NA, col = 'grey') +
  geom_point(data = filter(berk_record_final, year == 2021),
             aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) +
  scale_color_gradient2(low = 'blue', mid = 'white', high = 'red',
                        limits = c(-5, 5), midpoint = 0,
                        labels = c('< -5', '-2.5', '0', '2.5', '> 5'),
                        name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) +
  theme_minimal(base_size = 13.5) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank()) +
  guides(size = 'none',
         alpha = 'none') +
  labs(x = element_blank(),
       y = element_blank(),
       title = 'Moments when record high and low temperatures were set at the specific location',
       subtitle = 'Date: 2021')

print(plot)
dev.off()

#Then read the image as an image magick object
a1_last_img_magic <- image_read('a1_last_image.png')

#Clean up - get rid of a1_last_img_magic
unlink('a1_last_image.png')

#Then append the last image (repeated 12 times) to animation of the image magick object created above
#Now i have the full animation for the first object as a list image magic onjects
anim1_full_magic <- c(anim1_magic, rep(a1_last_img_magic, 12))

#move to create the second visualization - the cumulative records

#Note i couldn't get this to work by creating a gganimation and then turning into a magick object as above
#For some reason the magick object would have fewer frames - see here: https://stackoverflow.com/questions/68853820/image-readgif-produces-a-different-number-of-frames-to-original-gif
#So instead i will loop over the data frame creating the required images for the visualization.
#Then convert each to a magick object and append
#Note i want to tween the data between points, thus i have to create the interpolated data myself

#First get a complete data frame for the cumulative data
temp_berk_inter <- data.frame(year = 1850:2021) %>% #create empty dataframe of year data
  arrange(year) %>%
  mutate('record_break_h', 'record_break_l') %>% #create record categories
  pivot_longer(-year, values_to = 'record') %>% #pivot longer
  select(-name) %>%
  mutate(1, 2, 3) %>% #create the iterations based on which i'll make the visualization
  pivot_longer(-c(year, record), names_to = 'iteration') %>% #pivot and clean
  select(-value)

#create a data frame with two records for each year - i am sure this could be build more effectively
complete_record <- data.frame(year = rep(1850:2021, 2),
                              record = c(rep('record_break_h', 2022-1850), rep('record_break_l', 2022-1850))) %>%
  arrange(year)

#create the final full data frame
temp_berk <- berk_record_final %>%
  ungroup() %>%
  group_by(year, record) %>%
  summarize(n = n()) %>% #take the final records only database and count the number of times a record gets set that year
  full_join(complete_record, by = c('year', 'record')) %>% #Then join to the complete records so now i have the cumulative high and low record for every year
  arrange(year) %>%
  replace_na(list(n = 0)) %>% #Create zero for all the years no records were set (previously zero years were skipped)
  ungroup() %>%
  group_by(record) %>%
  mutate(cum_n = cumsum(n)) %>% #Now count them again so that i have the cumulative record for a dataframe of complete years
  ungroup() %>%
  mutate(iteration = '3') %>% #Effectively i want to tween over three iterations a year (to match the first image). So i label each one of these as iteration 3 - where i want the point to be at the end of the year
  right_join(temp_berk_inter, by = c('year', 'record', 'iteration')) %>%  #The join with the iteration data frame - so i have three iterations for high and low records
  arrange(year, record, iteration) %>%
  mutate(cum_n = ifelse(year == 1850 & iteration == '1', 0, cum_n)) %>% #Have to create the first value - or else i cannot interpolate
  group_by(record) %>% #group by records and interpolate between them
  mutate(interpol = na.approx(cum_n)) %>% #interpolate (from zoo)
  ungroup() %>%
  mutate(n = 1:nrow(.)) #Add nrow for iteration puporses

#create a directory of temp_images in which to store the images i will create for the animation
dir.create('temp_images')

#Now loop over the data frame and create the images
#This dumps files into the folder called 'temp_images'
for (y in 1851:2021) { #for every year
  for (i in 1:3) { #and every iteration
    #write the following plot
    png(filename = paste0('temp_images/image', y, '-', i, '.png'),
        width = 600, height = 200, unit = 'px')

    plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < y) & (iteration == i)),
      aes(x = year, y = interpol, col = record, group = record)) +
      geom_line(lwd = 1) +
      geom_point(data = filter(temp_berk, (year == y & iteration == i)),
                 aes(x = year, y = interpol, col = record, group = record),
                 size = 4) +
      coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) +
      theme_minimal(base_size = 13.5) +
      labs(y = 'Cumulative records',
           x = 'Year') +
      scale_color_manual(name = 'Record:',
                         label = c('High', 'Low'),
                         values = c('#FF7B7B', '#2A9DF4')) +
      labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date.
      Source: Berkley Earth
           Credit: Visualization created by James Morrissey')

    print(plot)
    dev.off()
  }
}

#Again because i want the viz to pause on the last image, i create a version of the last image
png(filename = 'a2_last_image.png',
    width = 600, height = 200, unit = 'px')
plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < 2021) & (iteration == 3)),
               aes(x = year, y = interpol, col = record, group = record)) +
  geom_line(lwd = 1) +
  geom_point(data = filter(temp_berk, (year == 2021 & iteration == 3)),
             aes(x = year, y = interpol, col = record, group = record),
             size = 4) +
  coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) +
  theme_minimal(base_size = 13.5) +
  labs(y = 'Cumulative records',
       x = 'Year') +
  scale_color_manual(name = 'Record:',
                     label = c('High', 'Low'),
                     values = c('#FF7B7B', '#2A9DF4')) +
  labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date.
  Source: Berkley Earth.
       Credit: Visualization created by James Morrissey')
print(plot)
dev.off()

#Then i get the list of file names in the temp folder
a2_image_file_names <- list.files('temp_images/', full.names = T)
#I add the last image 12 times
a2_image_file_names <- c(a2_image_file_names, rep('a2_last_image.png', 12))
#I them convert all images into image magick objects
a2_img_list <- map(a2_image_file_names, image_read)

#clean up - delete all the images i just created and the temp folder and the a2_last_image.png
unlink('temp_images', recursive = T)
unlink('a2_last_image.png')

#Then join all the image magick objects into a single image magick object
anim2_full_magic <- image_join(a2_img_list)

#Now we have the two full animations as image magick objects

#Now stick the two objects together
#Then iterate over the two image magick objects for the two visualizations and stick them together frame-by-frame
anim1_2_full_magic <- image_append(c(anim1_full_magic[1], anim2_full_magic[1]), stack = T)
for(i in 2:525) {
  combined <- image_append(c(anim1_full_magic[i], anim2_full_magic[i]), stack = T)
  anim1_2_full_magic <- c(anim1_2_full_magic, combined)
}

#Now write as a gif - use the gifski renderer (image_write_gif) as this is much faster
image_write_gif(anim1_2_full_magic, path = 'anim_full.gif', delay = rep(c(0.2, 0.5, 0.2), 525/3))
	#The below code is used to support the generation of the analysis informing this blog: [insert html]
	#Note this only pertains to the animated visualization, not the bar chart on that blog

	library('ncdf4') # package for netcdf manipulation
	library('ggplot2') # package for plotting
	library('tidyverse') #for wrangling
	library('lubridate') #for dates
	library('gganimate') #for animation
	library('magick') #for animation
	library('zoo') #for NA
	library('purrr') #to map functions

	#Download the files for high and low temps using equal area, from here: http://berkeleyearth.org/data/
	download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMAX_EqualArea.nc', 'berk_tmax_equal_area.nc', mode = 'wb')
	download.file('http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TMIN_EqualArea.nc', 'berk_tmin_equal_area.nc', mode = 'wb')

	#import the Berkely earth data - from here: http://berkeleyearth.org/data/
	tmax_nc_berk <- nc_open('berk_tmax_equal_area.nc')
	tmin_nc_berk <- nc_open('berk_tmin_equal_area.nc')

	#looking at the nc_berk data, we have:
	#A two dimensional array of temperature data, with time and temperature data, for each lon and lat
	#Plan is to extract the array for every time period, match it to the lat and lon data and stick it in a single df

	#First get the array
	temp_anom_array <- ncvar_get(tmax_nc_berk, 'temperature')

	#Get the time dimensions of the array - which is dimension 2 in this case
	time_layers <- 1:dim(temp_anom_array)[2]

	#Then write a function that creates a df for a stated time value
	#Function takes a df and an index number for the array and produces a df with the time value and temperature data for that index, for every lat, long combination
	layer_extract <- function(df, x) {
	output <- data.frame(date = ncvar_get(df, 'time')[x], #grab the indexed time var
	lon = ncvar_get(df, 'longitude'), #grab the lat and lon
	lat = ncvar_get(df, 'latitude'),
	temp_anom = ncvar_get(df, 'temperature')[,x]) #grab the indexed temperature var
	#Clean the output
	output <- output %>%
	separate(date, into = c('year', 'day'), sep = '\\.') %>%
	mutate(day = as.numeric(paste0('0.', day))) %>%
	mutate(days = ifelse(leap_year(as.numeric(year)), 366, 365)) %>%
	mutate(day = round(day * days, 0)) %>%
	mutate(date = as.Date(day, format = '%j', origin = as.Date(paste0(year, '-01-01')))) %>%
	select(date, lon, lat, temp_anom)

	print(paste(x, 'of', max(time_layers), 'done'))

	return(output)
	}

	#iterate over all the time values in a dataframe to create a list of df's with each list holding a df with temp values for that time linked to lon and lat
	#NOTE this will take a while to run as its creating 2054 (of value of temp_anom_array)[2] lists of 5024 values - approx 10 mil values
	list_of_grid_tmax_data <- map(time_layers, layer_extract, df = tmax_nc_berk)
	list_of_grid_tmin_data <- map(time_layers, layer_extract, df = tmin_nc_berk)

	#bind the lists into a single df
	berk_raw_tmax <- do.call('rbind', list_of_grid_tmax_data)
	berk_raw_tmin <- do.call('rbind', list_of_grid_tmin_data)

	#Join and clean the two data frames
	berk_raw_full <- berk_raw_tmax %>%
	rename(tmax = temp_anom) %>%
	full_join(berk_raw_tmin, by = c('date', 'lat', 'lon')) %>%
	rename(tmin = temp_anom)

	#Create the dataframe of the 3rd IQR
	#Do this so that I have a starting record for each area - the starting record is assumed to be the 75 percentile of the anomalous period - 1951-1980
	berk_annual_iqrs <- berk_raw_full %>%
	filter(!(is.na(tmax) \| is.na(tmin))) %>% #get rid of NA to make processing easier
	filter(date > as.Date('1951-01-01') & date < as.Date('1980-12-31')) %>% #set the period for IQR
	mutate(year = as.integer(year(date)), #correct the data format
	lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create a single object by which i can group observations
	group_by(year, lat_lon_pair) %>% #group the data
	summarize(tmax = mean(tmax, na.rm = T), #calculate the means so that i can visualize it annually - otherwise there would be too many iterations to visualize
	tmin = mean(tmin, na.rm = T)) %>%
	ungroup() %>%
	group_by(lat_lon_pair) %>%
	summarize(tmax_upper_q = quantile(tmax, na.rm = T)[4],#pull the 4th object from the IQR - 75% cutoff for high temps
	tmin_lower_q = quantile(tmin, na.rm = T)[2]) #pull the 2nd object from the IQR - 25% cutoff for low temps

	#Identify all the instances in which records are broken and the anomalies at which they are broken.
	berk_records <- berk_raw_full %>%
	filter(!(is.na(tmax) \| is.na(tmin))) %>% #get rid of NA to make processing easier
	mutate(year = as.integer(year(date)), #clean data types
	lat_lon_pair = paste(lat, lon, sep = '/')) %>% #create single object for grouping
	group_by(year, lat_lon_pair) %>%
	summarize(tmax = mean(tmax, na.rm = T), #calculate the annual mean temp high and low anomaly for each year for each location
	tmin = mean(tmin, na.rm = T)) %>%
	ungroup() %>%
	arrange(year) %>%
	group_by(lat_lon_pair) %>%
	mutate(cumulative_record_h = cummax(tmax), #calculate the cummulative maximum temp and cumulative minimum temp for each location
	cumulative_record_l = cummin(tmin)) %>%
	left_join(berk_annual_iqrs, by = 'lat_lon_pair') %>% #join the IQR data so that i can check records - against cumulative max/min or IQR
	mutate(record_h = ifelse(tmax_upper_q > cumulative_record_h, tmax_upper_q, cumulative_record_h), #check whether the record is the IQR or the cummulative max/min
	record_l = ifelse(tmin_lower_q < cumulative_record_l, tmin_lower_q, cumulative_record_l)) %>%
	mutate(record_break_h = ifelse(tmax >= record_h, T, F), #identify instances where records occur
	record_break_l = ifelse(tmin <= record_l, T, F)) %>%
	select(year, lat_lon_pair, tmin, tmax, record_break_h, record_break_l) %>% #clean the data
	separate(lat_lon_pair, into = c('lat', 'lon'), sep = '/') %>%
	mutate_at(.vars = vars(lat, lon), .funs = as.numeric) %>%
	ungroup() %>%
	pivot_longer(cols = c(record_break_h, record_break_l), names_to = 'record') %>%
	filter(value == T) %>%
	mutate(recordtemp = ifelse(record == 'record_break_l', tmin, tmax))

	#transform the data so that it can be visualized effectively
	#Essentially i have to add iterations to the data which causes the dots to shrink and fade for each year
	berk_record_final <- berk_records %>%
	mutate(recordtemp = ifelse(record == 'record_break_l' & recordtemp < -5, -5, #first squash temperature records such that more 5 or less than -5 goes to 5, this avoids the temperature scale from all shifting to the middle due to extremes
	ifelse(record == 'record_break_h' & recordtemp > 5, 5, recordtemp))) %>%
	mutate(alpha_1 = 1, alpha_2 = 0.5, alpha_3 = 0.1) %>% #then create three columns for the alpha value for grpahing
	pivot_longer(grep('_', colnames(.)), names_to = 'iteration', values_to = 'alpha') %>% #pivot those into longer format to there are 3 alpha values for each year for each location
	mutate(iteration = as.numeric(gsub('[a-z]+_', '0.', iteration))) %>% #Make the iterations useful
	mutate(iteration = as.double(year) + iteration) %>%
	mutate(size = ifelse(alpha == 1, 1, #create size values for each alpha - i.e. dots decrease in size and increase in transparency
	ifelse(alpha == 0.5, 0.5, 0.1))) %>%
	arrange(iteration)

	#get the world map for graphing
	world <- map_data('world')

	#Get the number of frames for animating the viz
	nframes = length(unique(berk_record_final$year)) * 3

	#Create the first animation
	a1 <- ggplot() +
	geom_polygon(data = world, #plot the world on an empty plot
	aes(x = long, y = lat, group = group),
	fill = NA, col = 'grey') +
	geom_point(data = berk_record_final, #plot the points for the records
	aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) +
	scale_color_gradient2(low = 'blue', mid = 'white', high = 'red', #create the color graident
	limits = c(-5, 5), midpoint = 0,
	labels = c('< -5', '-2.5', '0', '2.5', '> 5'),
	name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) +
	theme_minimal(base_size = 13.5) +
	theme(axis.text.x = element_blank(),
	axis.text.y = element_blank()) +
	guides(size = 'none',
	alpha = 'none') +
	labs(x = element_blank(),
	y = element_blank(),
	title = 'Moments when record high and low temperatures were set at the specific location',
	subtitle = 'Date: {1850 + frame %/% 3}') +
	transition_manual(iteration) #transition manually between frames with no tweening or othe modifications - thus why i had to make all the iterations - note i coulnd't get fade in and out to work

	#create the animation with gganimate
	#height and width are in px
	#fps doesn't matter here as i will remake the animation later with stated fps/delay under magic
	anim1 <- animate(a1, nframes = nframes, fps = 6, height = 500, width = 600)

	#Becauese i want to stich the two visualizations together, i convert to image magick object which can be appended in that way
	anim1_magic <- image_read(anim1)

	#I want the visualization to pause on the last frame, so i repeat it 12 times (i.e. 4 seconds - later set 3 fps)
	#first create the viz - writing the image to file
	png(filename = 'a1_last_image.png',
	height = 500, width = 600, unit = 'px')

	plot <- ggplot() +
	geom_polygon(data = world,
	aes(x = long, y = lat, group = group),
	fill = NA, col = 'grey') +
	geom_point(data = filter(berk_record_final, year == 2021),
	aes(x = lon, y = lat, col = recordtemp, size = size, alpha = alpha)) +
	scale_color_gradient2(low = 'blue', mid = 'white', high = 'red',
	limits = c(-5, 5), midpoint = 0,
	labels = c('< -5', '-2.5', '0', '2.5', '> 5'),
	name = expression(atop('Record Temp', 'Anomaly (' ^o * 'C):'))) +
	theme_minimal(base_size = 13.5) +
	theme(axis.text.x = element_blank(),
	axis.text.y = element_blank()) +
	guides(size = 'none',
	alpha = 'none') +
	labs(x = element_blank(),
	y = element_blank(),
	title = 'Moments when record high and low temperatures were set at the specific location',
	subtitle = 'Date: 2021')

	print(plot)
	dev.off()

	#Then read the image as an image magick object
	a1_last_img_magic <- image_read('a1_last_image.png')

	#Clean up - get rid of a1_last_img_magic
	unlink('a1_last_image.png')

	#Then append the last image (repeated 12 times) to animation of the image magick object created above
	#Now i have the full animation for the first object as a list image magic onjects
	anim1_full_magic <- c(anim1_magic, rep(a1_last_img_magic, 12))

	#move to create the second visualization - the cumulative records

	#Note i couldn't get this to work by creating a gganimation and then turning into a magick object as above
	#For some reason the magick object would have fewer frames - see here: https://stackoverflow.com/questions/68853820/image-readgif-produces-a-different-number-of-frames-to-original-gif
	#So instead i will loop over the data frame creating the required images for the visualization.
	#Then convert each to a magick object and append
	#Note i want to tween the data between points, thus i have to create the interpolated data myself

	#First get a complete data frame for the cumulative data
	temp_berk_inter <- data.frame(year = 1850:2021) %>% #create empty dataframe of year data
	arrange(year) %>%
	mutate('record_break_h', 'record_break_l') %>% #create record categories
	pivot_longer(-year, values_to = 'record') %>% #pivot longer
	select(-name) %>%
	mutate(1, 2, 3) %>% #create the iterations based on which i'll make the visualization
	pivot_longer(-c(year, record), names_to = 'iteration') %>% #pivot and clean
	select(-value)

	#create a data frame with two records for each year - i am sure this could be build more effectively
	complete_record <- data.frame(year = rep(1850:2021, 2),
	record = c(rep('record_break_h', 2022-1850), rep('record_break_l', 2022-1850))) %>%
	arrange(year)

	#create the final full data frame
	temp_berk <- berk_record_final %>%
	ungroup() %>%
	group_by(year, record) %>%
	summarize(n = n()) %>% #take the final records only database and count the number of times a record gets set that year
	full_join(complete_record, by = c('year', 'record')) %>% #Then join to the complete records so now i have the cumulative high and low record for every year
	arrange(year) %>%
	replace_na(list(n = 0)) %>% #Create zero for all the years no records were set (previously zero years were skipped)
	ungroup() %>%
	group_by(record) %>%
	mutate(cum_n = cumsum(n)) %>% #Now count them again so that i have the cumulative record for a dataframe of complete years
	ungroup() %>%
	mutate(iteration = '3') %>% #Effectively i want to tween over three iterations a year (to match the first image). So i label each one of these as iteration 3 - where i want the point to be at the end of the year
	right_join(temp_berk_inter, by = c('year', 'record', 'iteration')) %>% #The join with the iteration data frame - so i have three iterations for high and low records
	arrange(year, record, iteration) %>%
	mutate(cum_n = ifelse(year == 1850 & iteration == '1', 0, cum_n)) %>% #Have to create the first value - or else i cannot interpolate
	group_by(record) %>% #group by records and interpolate between them
	mutate(interpol = na.approx(cum_n)) %>% #interpolate (from zoo)
	ungroup() %>%
	mutate(n = 1:nrow(.)) #Add nrow for iteration puporses

	#create a directory of temp_images in which to store the images i will create for the animation
	dir.create('temp_images')

	#Now loop over the data frame and create the images
	#This dumps files into the folder called 'temp_images'
	for (y in 1851:2021) { #for every year
	for (i in 1:3) { #and every iteration
	#write the following plot
	png(filename = paste0('temp_images/image', y, '-', i, '.png'),
	width = 600, height = 200, unit = 'px')

	plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < y) & (iteration == i)),
	aes(x = year, y = interpol, col = record, group = record)) +
	geom_line(lwd = 1) +
	geom_point(data = filter(temp_berk, (year == y & iteration == i)),
	aes(x = year, y = interpol, col = record, group = record),
	size = 4) +
	coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) +
	theme_minimal(base_size = 13.5) +
	labs(y = 'Cumulative records',
	x = 'Year') +
	scale_color_manual(name = 'Record:',
	label = c('High', 'Low'),
	values = c('#FF7B7B', '#2A9DF4')) +
	labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date.
	Source: Berkley Earth
	Credit: Visualization created by James Morrissey')

	print(plot)
	dev.off()
	}
	}

	#Again because i want the viz to pause on the last image, i create a version of the last image
	png(filename = 'a2_last_image.png',
	width = 600, height = 200, unit = 'px')
	plot <- ggplot(data = filter(temp_berk, (year >= 1850 & year < 2021) & (iteration == 3)),
	aes(x = year, y = interpol, col = record, group = record)) +
	geom_line(lwd = 1) +
	geom_point(data = filter(temp_berk, (year == 2021 & iteration == 3)),
	aes(x = year, y = interpol, col = record, group = record),
	size = 4) +
	coord_cartesian(xlim = c(1850, 2021), ylim = c(0, 120117)) +
	theme_minimal(base_size = 13.5) +
	labs(y = 'Cumulative records',
	x = 'Year') +
	scale_color_manual(name = 'Record:',
	label = c('High', 'Low'),
	values = c('#FF7B7B', '#2A9DF4')) +
	labs(caption = 'Note: Record events have to either exceed the value of the 3rd IQR or be the highest value to date.
	Source: Berkley Earth.
	Credit: Visualization created by James Morrissey')
	print(plot)
	dev.off()

	#Then i get the list of file names in the temp folder
	a2_image_file_names <- list.files('temp_images/', full.names = T)
	#I add the last image 12 times
	a2_image_file_names <- c(a2_image_file_names, rep('a2_last_image.png', 12))
	#I them convert all images into image magick objects
	a2_img_list <- map(a2_image_file_names, image_read)

	#clean up - delete all the images i just created and the temp folder and the a2_last_image.png
	unlink('temp_images', recursive = T)
	unlink('a2_last_image.png')

	#Then join all the image magick objects into a single image magick object
	anim2_full_magic <- image_join(a2_img_list)

	#Now we have the two full animations as image magick objects

	#Now stick the two objects together
	#Then iterate over the two image magick objects for the two visualizations and stick them together frame-by-frame
	anim1_2_full_magic <- image_append(c(anim1_full_magic[1], anim2_full_magic[1]), stack = T)
	for(i in 2:525) {
	combined <- image_append(c(anim1_full_magic[i], anim2_full_magic[i]), stack = T)
	anim1_2_full_magic <- c(anim1_2_full_magic, combined)
	}

	#Now write as a gif - use the gifski renderer (image_write_gif) as this is much faster
	image_write_gif(anim1_2_full_magic, path = 'anim_full.gif', delay = rep(c(0.2, 0.5, 0.2), 525/3))