TonyLadson/Temporal_patterns_blog.R

## Temporal_patterns_blog.R


# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT


# Packages ----------------------------------------------------------------

devtools::install_github("thomasp85/patchwork")

library(tidyverse)
library(stringr)
library(rlang)
library(ggmap)
library(patchwork)


# Load data for Figure 2 in blog and plot ------------------------------------------


list.files('../data')

pat <- read_csv(file = file.path('../data', "Areal_pattern_7200min_200km2_SS.csv" ), skip = 1 )
glimpse(pat)

max(pat)
#[1] 48.82

ann_text <- data.frame(time = 30, percentage = 35, lab = "ID = 6884", hjust = 'left',
                       pattern_no = factor('X7', levels = str_c('X', 1:10), ordered = TRUE ))

p <- pat %>%
  mutate(time = 1:nrow(pat)) %>%
  gather(-time, key = 'pattern_no', value = 'percentage') %>%
  mutate(pattern_no = factor(pattern_no, levels = str_c('X', 1:10), ordered = TRUE )) %>%
  ggplot(aes(time, percentage)) +
  geom_col(fill = 'blue', colour = 'black') +
  facet_wrap(~pattern_no) +
  geom_text(data = ann_text,aes(label = lab), size = 2) +
  scale_y_continuous(name = 'Percentage in each increment') +
  scale_x_continuous(name = '') +
  labs(title = 'Areal Temporal Patterns',
       subtitle = 'Southern Slopes, duration = 7200 min (120 hours), area = 200 sq-km, \n40 increments of 180 min')+
  theme_gray(base_size = 7)

p
ggsave(file.path('../figures', 'southern_slopes_6884.png'), plot = p, width = 4, height = 3)


# Download Areal pattern increment files and all states files -----------------------------------------------------

# There are 12 regions
# 1.  Rangelands West
# 2. Wet Tropics
# 3. Rangelands
# 4. Central Slopes
# 5. Monsoonal North
# 6. Murray Basin
# 7. East Coast North
# 8. East Coast South
# 9. Southern Slopes Mainland
# 10. Southern Slopes Tasmania
# 11. East flatlands
# 12. West flatlands

# Two files for each region
# An increments file - which has the temporal patterns
# An Allstats file which as information on the storms that were used to generate the temporal patterns.


# Get all the Areal Increment files
increment_files <- list.files('../data')
increment_files <- str_subset(increment_files, '(Areal).*(Increments)') # Extract files that contain 'Areal' and 'Increments'
length(increment_files) # 12, as expected
increment_files

# [1] "Areal_CS_Increments.csv"         "Areal_ECnorth_Increments.csv"    "Areal_ECsouth_Increments.csv"    "Areal_FLTeast_Increments.csv"
# [5] "Areal_FLTwest_Increments.csv"    "Areal_MB_Increments.csv"         "Areal_MN_Increments.csv"         "Areal_R_Increments.csv"
# [9] "Areal_Rwest_Increments.csv"      "Areal_SSmainland_Increments.csv" "Areal_SStas_Increments.csv"      "Areal_WT_Increments.csv"


# list the All Stats files

str_subset(list.files('../data'), 'AllStats') # Extract files that contain 'AllStats'

# [1] "Areal_CS_AllStats.csv"         "Areal_ECnorth_AllStats.csv"    "Areal_ECsouth_AllStats.csv"
# [4] "Areal_FLTeast_AllStats.csv"    "Areal_FLTwest_AllStats.csv"    "Areal_MB_AllStats.csv"
# [7] "Areal_MN_AllStats.csv"         "Areal_R_AllStats.csv"          "Areal_Rwest_AllStats.csv"
# [10] "Areal_SSmainland_AllStats.csv" "Areal_SStas_AllStats.csv"      "Areal_WT_AllStats.csv"

# link file names to region names
allstats_files <- set_names(c("Areal_CS_AllStats.csv",
                              "Areal_ECnorth_AllStats.csv",
                              "Areal_ECsouth_AllStats.csv",
                              "Areal_FLTeast_AllStats.csv",
                              "Areal_FLTwest_AllStats.csv",
                              "Areal_MB_AllStats.csv",
                              "Areal_MN_AllStats.csv",
                              "Areal_R_AllStats.csv",
                              "Areal_Rwest_AllStats.csv",
                              "Areal_SSmainland_AllStats.csv",
                              "Areal_SStas_AllStats.csv",
                              "Areal_WT_AllStats.csv"),
                            c('central_slopes',
                              'east_coast_north',
                              'east_coast_south',
                              'east_flatlands',
                              'west_flatlands',
                              'murray_basin',
                              'monsoonal_north',
                              'rangelands',
                              'rangelands_west',
                              'southern_slopes_mainland',
                              'southern_slopes_tas',
                              'wet_tropics'))


# Function to download an all stats file, convert to a dataframe and store the whole lot in a data frame

Get_storm_stats <- function(fname) {

  # Read in all the patterns
  # One pattern per line

  #fname <- "Areal_CS_AllStats.csv"
  storm_stats <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)


  # Split the strings at commas
  storm_stats <- str_split(storm_stats, ',')


  # Convert to data frame
  storm_stats <- as_data_frame(rlang::exec('rbind', !!!storm_stats))

  # name the columns
  names(storm_stats) <- c('eventID','region', 'region_source',
                          'duration', 'timestep', 'original_burst_depth_mm',
                          'area','aspect_ratio', 'rotation',
                          'burst_start_time', 'lat', 'lon')
  storm_stats


}

# Get stats for all events
all_regions_stats <- map_df(.x = allstats_files, .f = Get_storm_stats)

all_regions_stats

# Function to calcuate the NUI

Chisquare_p <- function(p) {
  p <- p[-(1:5)] # Remove the first 5 elements
  p <- as.numeric(p) # Convert to numeric
  p_e <- rep(100/length(p), times = length(p)) # Expected
  chi <- sum(((p - p_e)^2)/p_e) # o - e
  -log(-pchisq(chi, df = length(p) - 1, log.p = TRUE))
  # This transformation provides numbers that are in a range that are easy to work with
}


# Function to read in patterns, calculate NUI and return a data frame with information:
# eventID, duration_min, duration_hr, interval_min, region, NUI

# fname is the file name of the increments file

Find_bad_patterns <- function(fname) {

  # Read in all the patterns
  # One pattern per line
  pat <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)

  # Split the strings at commas
  pat <- str_split(pat, ',')

  # Calculate the Chisquare statistic for each pattern
  chisq_stat <- unlist(map(.x = pat, .f = Chisquare_p))


  # Function to grab the first 5 fields from a temporal pattern
  # The first 5 entres are:
  # 1. EventID
  # 2. Duration (minutes)
  # 3. Interval length (min)
  # 4. Region
  # 5. Area

  First_five <- function(p) {
    p <- p[1:5]
  }


  names(pat) <- 1:length(pat) # pat needs to have names for map_df to work
  pat_db <- map_df(.x = pat, .f= First_five) # get first 5 fields
  pat_db <- as_data_frame(t(as.matrix(pat_db))) # need to transpose to convert to data frame
  names(pat_db) <- c('eventID', 'duration_min', 'interval_min', 'region', 'area')


  # Add the non_uniformity index value NUI

  pat_db <- pat_db %>%
    mutate(NUI = chisq_stat) %>%
    mutate(duration_min = as.integer(duration_min)) %>%
    mutate(row_n = 1:length(pat)) %>%
    mutate(duration_hr = as.integer(duration_min/60)) %>%
    select(row_n, eventID, duration_min, duration_hr, interval_min, region, NUI)


  # return the data for each region

  pat_db %>%
    arrange(desc(NUI))

}


# Process all increment files to creat a summary for all patterns
all_patterns_sum <- map_df(.x = increment_files, .f = Find_bad_patterns )
all_patterns_sum <- all_patterns_sum %>%  arrange(desc(NUI))

#Attach storm data to pattern summary


all_patterns_sum <- all_patterns_sum %>%
  left_join(all_regions_stats, by = c("region" = "region", "eventID" = "eventID")) %>%
  arrange(desc(NUI))

#all_patterns_sum %>% View
all_patterns_sum %>% print(n = 21)

# Map locations ----------------------------------------------------------


# Plot a graph of locations
#

# First n locations

my_rows = 1:16

storm_map_df <- all_patterns_sum %>%
  slice(my_rows) %>%
  mutate_at(.vars = c('lat','lon'), .funs = as.numeric) %>%
  add_column(num = my_rows)


# You need an API key for this to work see ?register_google

# Calculating the scale

# Source function that calculates distance (UTM) from degrees
source("https://gist.githubusercontent.com/TonyLadson/f37aab3e2ef517188a7f27166307c985/raw/0822970769bc90fcc28052a91b375399d665286e/UTM2deg.R")

# These eastings and northings are from the Jerangle area where the least uniform
# patterns occur
UTM2deg(easting = 712748.88, northing = 6021016.41, zone = 55, format_out = 'deg' )
# lat      lon
# 1 -35.93206 149.3583

#If we move 20 km how many degrees?
UTM2deg(easting = 712748.88 + 20000, northing = 6021016.41, zone = 55, format_out = 'deg' )
# lat      lon
# 1 -35.92751 149.5798

# Different in lon
149.5798 - 149.3583 # 0.22
# Difference in lat
-35.93206 - -35.92751 #-0.00455
# So 20 km is about 0.22 deg, we can use this to create an approximate scale


# Get segment of map of Australia for area south of Canberra


aust_map <- ggmap(get_map(location = c(lon =149.3375 , lat = -35.8875), zoom = 10), extent = 'device')


aust_map

# Specification of label locations
my_nudge_x = c(0, 0.02, 0, -0.01, 0.01, -0.01, 0.01, -0.01, 0, 0.015, 0, 0.015, 0, 0.02, 0, -0.02)

my_nudge_y = c(0.02, 0, -0.02, 0, 0.01, 0, 0, 0, 0.01, 0, -0.01, 0, 0.01, 0, -0.01, -0.01 )


# Add points
# One point is in Queensland which won't be shown
p <- aust_map +
  geom_point(aes(x = lon, y = lat), colour = 'red', size = 0.4, data = storm_map_df) +
  geom_text(aes(x = lon, y = lat, label = my_rows), data = storm_map_df, size = 1.9, check_overlap = FALSE,
            nudge_x = my_nudge_x, nudge_y = my_nudge_y)  +
  theme_minimal(base_size=5) +
  labs(x = NULL, y = NULL) +
  geom_segment(x = 149.0, xend = 149.0, y = -35.8, yend = -35.6, colour = 'black', # North arrow
               arrow = arrow(length = unit(0.02, "npc"), type = 'closed')) +
  geom_segment(x = 149, xend = 149.22 , y =-36.1 , yend =-36.1 , colour = 'black', # Scale
               arrow = arrow(angle = 90, ends = 'both', length = unit(0.02, "npc"))) +
  annotate(geom = 'text', x = 149.11, y = -36.08, label = '20 km', size = 1.5)

p


# Save for webpage

ggsave(file.path('../figures', 'storm_loc_map.png'), p, width = 4, height = 4)


# Process patterns for graphing -------------------------------------------


# Function to create list of patterns
# Patterns need to be stored in a list rather than a data frame because the number of increments vary

List_patterns <- function(fname) {
  pat <- read_lines(file = file.path('../data', fname ), skip = 1, n_max = -1L)

  # Split the strings at commas
  pat <- str_split(pat, ',')

  # Get the region names and the event IDs so that we can name list
  # elements and then pull individual patterns out.
  # The region names are the 4th element and the pattern number are the first

  patIDs <- map_chr(.x = 1:length(pat), .f = function(i)(str_c (pat[[i]][4], pat[[i]][1], sep = '_')))
  patIDs <- str_to_lower(patIDs) # convert to lower case
  patIDs <- str_replace_all(patIDs, ' ', '_') # convert to snake case

  names(pat) <- patIDs

  return(pat)

}


# Apply to all increments files

all_increments <-  map(.x = increment_files, .f = List_patterns)
# This produces a list of 12 lists
# Use purrr::flatten to make into a single list

all_increments <- purrr::flatten(all_increments)
#head(all_increments)
#tail(all_increments)

# What are the region names
all_names <- names(all_increments)
unique(str_replace(all_names, '[0-9]+', ''))

# [1] "central_slopes_"             "east_coast_(north)_"         "east_coast_(south)_"         "s-sw_flatlands_(east)_"      "s-sw_flatlands_(west)_"
# [6] "murray_basin_"               "monsoonal_north_"            "rangelands_"                 "rangelands_(west)_"          "southern_slopes_(mainland)_"
# [11] "southern_slopes_(tasmania)_" "wet_tropics_"


# Function to graph a pattern given the region name and event id e.g. central_slopes_3565

Graph_pattern <- function(pat_name_id) {


  pat_data <- all_increments[[pat_name_id]]

  my_eventID <- pat_data[1]
  my_duration <- pat_data[2]
  my_increments <- pat_data[3]
  my_region <- pat_data[4]
  my_area <-  pat_data[5]
  my_breaks <- seq(0, 60, by = 5) # The maximum number of increments is 56
  # so setting this to 60 will always work


  my_title <- sprintf('Areal Temporal Pattern %s',my_eventID)

  my_subtitle <- sprintf('Region = %s, Area = %s km-squared, \nDuration = %s min (%.0f hour), %i increments of %s min',
                         my_region, my_area, my_duration, as.numeric(my_duration)/60, as.numeric(my_duration)/as.numeric(my_increments), my_increments)

  # remove header information just leaving numbers
  pat_data <- as.numeric(pat_data[-1:-5]  )

  # Create dataframe for plotting
  pat_data_df <- data_frame(increments = 1:length(pat_data), percentages = pat_data )


  p <- pat_data_df %>%
    ggplot(aes(x = increments, y = pat_data)) +
    geom_col(fill = 'blue', colour = 'black') +
    scale_y_continuous(name = 'Percentage in each increment') +
    scale_x_continuous(breaks = my_breaks) +
    labs(title = my_title,
         subtitle = my_subtitle) +
    theme_grey(base_size = 5)


  # five minors per major
  #https://stackoverflow.com/questions/39691591/ggplot2-integer-multiple-of-minor-breaks-per-major-break

  majors <- ggplot_build(p)$layout$panel_params[[1]]$x.major_source

  multiplier <- 5
  minors <- seq(from = min(majors),
                to = max(majors),
                length.out = ((length(majors) - 1) * multiplier) + 1)
  p <- p + scale_x_continuous(name = 'Increments', breaks = my_breaks, minor_breaks = minors)
  suppressMessages(print(p))
}


# Create graphs for figures 4 and 5 in the blog

p1 <- Graph_pattern('murray_basin_5901')
p2 <-Graph_pattern('murray_basin_5644')
p3 <-Graph_pattern('murray_basin_5731')
p4 <-Graph_pattern('murray_basin_5898')

p <- p1 + p2 + p3 + p4

p

ggsave(file.path('../figures', 'next4_patterns.png'), p, width = 4, height = 3)


p5 <- Graph_pattern('murray_basin_5810')
p6 <- Graph_pattern('central_slopes_4170')
p7 <- Graph_pattern('murray_basin_5822')
p8 <- Graph_pattern('monsoonal_north_5004')


p <- p5 + p6 + p7 + p8
p
ggsave(file.path('../figures', 'next5_8_patterns.png'), p, width = 4, height = 3)


	# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT


	# Packages ----------------------------------------------------------------

	devtools::install_github("thomasp85/patchwork")

	library(tidyverse)
	library(stringr)
	library(rlang)
	library(ggmap)
	library(patchwork)



	# Load data for Figure 2 in blog and plot ------------------------------------------


	list.files('../data')

	pat <- read_csv(file = file.path('../data', "Areal_pattern_7200min_200km2_SS.csv" ), skip = 1 )
	glimpse(pat)

	max(pat)
	#[1] 48.82

	ann_text <- data.frame(time = 30, percentage = 35, lab = "ID = 6884", hjust = 'left',
	pattern_no = factor('X7', levels = str_c('X', 1:10), ordered = TRUE ))

	p <- pat %>%
	mutate(time = 1:nrow(pat)) %>%
	gather(-time, key = 'pattern_no', value = 'percentage') %>%
	mutate(pattern_no = factor(pattern_no, levels = str_c('X', 1:10), ordered = TRUE )) %>%
	ggplot(aes(time, percentage)) +
	geom_col(fill = 'blue', colour = 'black') +
	facet_wrap(~pattern_no) +
	geom_text(data = ann_text,aes(label = lab), size = 2) +
	scale_y_continuous(name = 'Percentage in each increment') +
	scale_x_continuous(name = '') +
	labs(title = 'Areal Temporal Patterns',
	subtitle = 'Southern Slopes, duration = 7200 min (120 hours), area = 200 sq-km, \n40 increments of 180 min')+
	theme_gray(base_size = 7)

	p
	ggsave(file.path('../figures', 'southern_slopes_6884.png'), plot = p, width = 4, height = 3)


	# Download Areal pattern increment files and all states files -----------------------------------------------------

	# There are 12 regions
	# 1. Rangelands West
	# 2. Wet Tropics
	# 3. Rangelands
	# 4. Central Slopes
	# 5. Monsoonal North
	# 6. Murray Basin
	# 7. East Coast North
	# 8. East Coast South
	# 9. Southern Slopes Mainland
	# 10. Southern Slopes Tasmania
	# 11. East flatlands
	# 12. West flatlands

	# Two files for each region
	# An increments file - which has the temporal patterns
	# An Allstats file which as information on the storms that were used to generate the temporal patterns.


	# Get all the Areal Increment files
	increment_files <- list.files('../data')
	increment_files <- str_subset(increment_files, '(Areal).*(Increments)') # Extract files that contain 'Areal' and 'Increments'
	length(increment_files) # 12, as expected
	increment_files

	# [1] "Areal_CS_Increments.csv" "Areal_ECnorth_Increments.csv" "Areal_ECsouth_Increments.csv" "Areal_FLTeast_Increments.csv"
	# [5] "Areal_FLTwest_Increments.csv" "Areal_MB_Increments.csv" "Areal_MN_Increments.csv" "Areal_R_Increments.csv"
	# [9] "Areal_Rwest_Increments.csv" "Areal_SSmainland_Increments.csv" "Areal_SStas_Increments.csv" "Areal_WT_Increments.csv"


	# list the All Stats files

	str_subset(list.files('../data'), 'AllStats') # Extract files that contain 'AllStats'

	# [1] "Areal_CS_AllStats.csv" "Areal_ECnorth_AllStats.csv" "Areal_ECsouth_AllStats.csv"
	# [4] "Areal_FLTeast_AllStats.csv" "Areal_FLTwest_AllStats.csv" "Areal_MB_AllStats.csv"
	# [7] "Areal_MN_AllStats.csv" "Areal_R_AllStats.csv" "Areal_Rwest_AllStats.csv"
	# [10] "Areal_SSmainland_AllStats.csv" "Areal_SStas_AllStats.csv" "Areal_WT_AllStats.csv"

	# link file names to region names
	allstats_files <- set_names(c("Areal_CS_AllStats.csv",
	"Areal_ECnorth_AllStats.csv",
	"Areal_ECsouth_AllStats.csv",
	"Areal_FLTeast_AllStats.csv",
	"Areal_FLTwest_AllStats.csv",
	"Areal_MB_AllStats.csv",
	"Areal_MN_AllStats.csv",
	"Areal_R_AllStats.csv",
	"Areal_Rwest_AllStats.csv",
	"Areal_SSmainland_AllStats.csv",
	"Areal_SStas_AllStats.csv",
	"Areal_WT_AllStats.csv"),
	c('central_slopes',
	'east_coast_north',
	'east_coast_south',
	'east_flatlands',
	'west_flatlands',
	'murray_basin',
	'monsoonal_north',
	'rangelands',
	'rangelands_west',
	'southern_slopes_mainland',
	'southern_slopes_tas',
	'wet_tropics'))



	# Function to download an all stats file, convert to a dataframe and store the whole lot in a data frame

	Get_storm_stats <- function(fname) {

	# Read in all the patterns
	# One pattern per line

	#fname <- "Areal_CS_AllStats.csv"
	storm_stats <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)



	# Split the strings at commas
	storm_stats <- str_split(storm_stats, ',')


	# Convert to data frame
	storm_stats <- as_data_frame(rlang::exec('rbind', !!!storm_stats))

	# name the columns
	names(storm_stats) <- c('eventID','region', 'region_source',
	'duration', 'timestep', 'original_burst_depth_mm',
	'area','aspect_ratio', 'rotation',
	'burst_start_time', 'lat', 'lon')
	storm_stats


	}

	# Get stats for all events
	all_regions_stats <- map_df(.x = allstats_files, .f = Get_storm_stats)

	all_regions_stats

	# Function to calcuate the NUI

	Chisquare_p <- function(p) {
	p <- p[-(1:5)] # Remove the first 5 elements
	p <- as.numeric(p) # Convert to numeric
	p_e <- rep(100/length(p), times = length(p)) # Expected
	chi <- sum(((p - p_e)^2)/p_e) # o - e
	-log(-pchisq(chi, df = length(p) - 1, log.p = TRUE))
	# This transformation provides numbers that are in a range that are easy to work with
	}


	# Function to read in patterns, calculate NUI and return a data frame with information:
	# eventID, duration_min, duration_hr, interval_min, region, NUI

	# fname is the file name of the increments file

	Find_bad_patterns <- function(fname) {

	# Read in all the patterns
	# One pattern per line
	pat <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)

	# Split the strings at commas
	pat <- str_split(pat, ',')

	# Calculate the Chisquare statistic for each pattern
	chisq_stat <- unlist(map(.x = pat, .f = Chisquare_p))


	# Function to grab the first 5 fields from a temporal pattern
	# The first 5 entres are:
	# 1. EventID
	# 2. Duration (minutes)
	# 3. Interval length (min)
	# 4. Region
	# 5. Area

	First_five <- function(p) {
	p <- p[1:5]
	}


	names(pat) <- 1:length(pat) # pat needs to have names for map_df to work
	pat_db <- map_df(.x = pat, .f= First_five) # get first 5 fields
	pat_db <- as_data_frame(t(as.matrix(pat_db))) # need to transpose to convert to data frame
	names(pat_db) <- c('eventID', 'duration_min', 'interval_min', 'region', 'area')


	# Add the non_uniformity index value NUI

	pat_db <- pat_db %>%
	mutate(NUI = chisq_stat) %>%
	mutate(duration_min = as.integer(duration_min)) %>%
	mutate(row_n = 1:length(pat)) %>%
	mutate(duration_hr = as.integer(duration_min/60)) %>%
	select(row_n, eventID, duration_min, duration_hr, interval_min, region, NUI)


	# return the data for each region

	pat_db %>%
	arrange(desc(NUI))

	}


	# Process all increment files to creat a summary for all patterns
	all_patterns_sum <- map_df(.x = increment_files, .f = Find_bad_patterns )
	all_patterns_sum <- all_patterns_sum %>% arrange(desc(NUI))

	#Attach storm data to pattern summary


	all_patterns_sum <- all_patterns_sum %>%
	left_join(all_regions_stats, by = c("region" = "region", "eventID" = "eventID")) %>%
	arrange(desc(NUI))

	#all_patterns_sum %>% View
	all_patterns_sum %>% print(n = 21)

	# Map locations ----------------------------------------------------------


	# Plot a graph of locations
	#

	# First n locations

	my_rows = 1:16

	storm_map_df <- all_patterns_sum %>%
	slice(my_rows) %>%
	mutate_at(.vars = c('lat','lon'), .funs = as.numeric) %>%
	add_column(num = my_rows)



	# You need an API key for this to work see ?register_google

	# Calculating the scale

	# Source function that calculates distance (UTM) from degrees
	source("https://gist.githubusercontent.com/TonyLadson/f37aab3e2ef517188a7f27166307c985/raw/0822970769bc90fcc28052a91b375399d665286e/UTM2deg.R")

	# These eastings and northings are from the Jerangle area where the least uniform
	# patterns occur
	UTM2deg(easting = 712748.88, northing = 6021016.41, zone = 55, format_out = 'deg' )
	# lat lon
	# 1 -35.93206 149.3583

	#If we move 20 km how many degrees?
	UTM2deg(easting = 712748.88 + 20000, northing = 6021016.41, zone = 55, format_out = 'deg' )
	# lat lon
	# 1 -35.92751 149.5798

	# Different in lon
	149.5798 - 149.3583 # 0.22
	# Difference in lat
	-35.93206 - -35.92751 #-0.00455
	# So 20 km is about 0.22 deg, we can use this to create an approximate scale


	# Get segment of map of Australia for area south of Canberra


	aust_map <- ggmap(get_map(location = c(lon =149.3375 , lat = -35.8875), zoom = 10), extent = 'device')


	aust_map

	# Specification of label locations
	my_nudge_x = c(0, 0.02, 0, -0.01, 0.01, -0.01, 0.01, -0.01, 0, 0.015, 0, 0.015, 0, 0.02, 0, -0.02)

	my_nudge_y = c(0.02, 0, -0.02, 0, 0.01, 0, 0, 0, 0.01, 0, -0.01, 0, 0.01, 0, -0.01, -0.01 )


	# Add points
	# One point is in Queensland which won't be shown
	p <- aust_map +
	geom_point(aes(x = lon, y = lat), colour = 'red', size = 0.4, data = storm_map_df) +
	geom_text(aes(x = lon, y = lat, label = my_rows), data = storm_map_df, size = 1.9, check_overlap = FALSE,
	nudge_x = my_nudge_x, nudge_y = my_nudge_y) +
	theme_minimal(base_size=5) +
	labs(x = NULL, y = NULL) +
	geom_segment(x = 149.0, xend = 149.0, y = -35.8, yend = -35.6, colour = 'black', # North arrow
	arrow = arrow(length = unit(0.02, "npc"), type = 'closed')) +
	geom_segment(x = 149, xend = 149.22 , y =-36.1 , yend =-36.1 , colour = 'black', # Scale
	arrow = arrow(angle = 90, ends = 'both', length = unit(0.02, "npc"))) +
	annotate(geom = 'text', x = 149.11, y = -36.08, label = '20 km', size = 1.5)

	p


	# Save for webpage

	ggsave(file.path('../figures', 'storm_loc_map.png'), p, width = 4, height = 4)


	# Process patterns for graphing -------------------------------------------


	# Function to create list of patterns
	# Patterns need to be stored in a list rather than a data frame because the number of increments vary

	List_patterns <- function(fname) {
	pat <- read_lines(file = file.path('../data', fname ), skip = 1, n_max = -1L)

	# Split the strings at commas
	pat <- str_split(pat, ',')

	# Get the region names and the event IDs so that we can name list
	# elements and then pull individual patterns out.
	# The region names are the 4th element and the pattern number are the first

	patIDs <- map_chr(.x = 1:length(pat), .f = function(i)(str_c (pat[[i]][4], pat[[i]][1], sep = '_')))
	patIDs <- str_to_lower(patIDs) # convert to lower case
	patIDs <- str_replace_all(patIDs, ' ', '_') # convert to snake case

	names(pat) <- patIDs

	return(pat)

	}




	# Apply to all increments files

	all_increments <- map(.x = increment_files, .f = List_patterns)
	# This produces a list of 12 lists
	# Use purrr::flatten to make into a single list

	all_increments <- purrr::flatten(all_increments)
	#head(all_increments)
	#tail(all_increments)

	# What are the region names
	all_names <- names(all_increments)
	unique(str_replace(all_names, '[0-9]+', ''))

	# [1] "central_slopes_" "east_coast_(north)_" "east_coast_(south)_" "s-sw_flatlands_(east)_" "s-sw_flatlands_(west)_"
	# [6] "murray_basin_" "monsoonal_north_" "rangelands_" "rangelands_(west)_" "southern_slopes_(mainland)_"
	# [11] "southern_slopes_(tasmania)_" "wet_tropics_"




	# Function to graph a pattern given the region name and event id e.g. central_slopes_3565

	Graph_pattern <- function(pat_name_id) {


	pat_data <- all_increments[[pat_name_id]]

	my_eventID <- pat_data[1]
	my_duration <- pat_data[2]
	my_increments <- pat_data[3]
	my_region <- pat_data[4]
	my_area <- pat_data[5]
	my_breaks <- seq(0, 60, by = 5) # The maximum number of increments is 56
	# so setting this to 60 will always work


	my_title <- sprintf('Areal Temporal Pattern %s',my_eventID)

	my_subtitle <- sprintf('Region = %s, Area = %s km-squared, \nDuration = %s min (%.0f hour), %i increments of %s min',
	my_region, my_area, my_duration, as.numeric(my_duration)/60, as.numeric(my_duration)/as.numeric(my_increments), my_increments)

	# remove header information just leaving numbers
	pat_data <- as.numeric(pat_data[-1:-5] )

	# Create dataframe for plotting
	pat_data_df <- data_frame(increments = 1:length(pat_data), percentages = pat_data )


	p <- pat_data_df %>%
	ggplot(aes(x = increments, y = pat_data)) +
	geom_col(fill = 'blue', colour = 'black') +
	scale_y_continuous(name = 'Percentage in each increment') +
	scale_x_continuous(breaks = my_breaks) +
	labs(title = my_title,
	subtitle = my_subtitle) +
	theme_grey(base_size = 5)


	# five minors per major
	#https://stackoverflow.com/questions/39691591/ggplot2-integer-multiple-of-minor-breaks-per-major-break

	majors <- ggplot_build(p)$layout$panel_params[[1]]$x.major_source

	multiplier <- 5
	minors <- seq(from = min(majors),
	to = max(majors),
	length.out = ((length(majors) - 1) * multiplier) + 1)
	p <- p + scale_x_continuous(name = 'Increments', breaks = my_breaks, minor_breaks = minors)
	suppressMessages(print(p))
	}


	# Create graphs for figures 4 and 5 in the blog

	p1 <- Graph_pattern('murray_basin_5901')
	p2 <-Graph_pattern('murray_basin_5644')
	p3 <-Graph_pattern('murray_basin_5731')
	p4 <-Graph_pattern('murray_basin_5898')

	p <- p1 + p2 + p3 + p4

	p

	ggsave(file.path('../figures', 'next4_patterns.png'), p, width = 4, height = 3)



	p5 <- Graph_pattern('murray_basin_5810')
	p6 <- Graph_pattern('central_slopes_4170')
	p7 <- Graph_pattern('murray_basin_5822')
	p8 <- Graph_pattern('monsoonal_north_5004')



	p <- p5 + p6 + p7 + p8
	p
	ggsave(file.path('../figures', 'next5_8_patterns.png'), p, width = 4, height = 3)