TonyLadson/Point_patterns.R

## Point_patterns.R


# I previously looked at areal patterns https://tonyladson.wordpress.com/2019/04/02/check-arr-temporal-patterns-before-you-use-them/
# This blog extends the analysis to point patterns


# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT


# Packages ----------------------------------------------------------------

devtools::install_github("thomasp85/patchwork")

library(tidyverse)
library(stringr)
library(rlang)
library(ggmap)
library(patchwork)

#help(package = 'stringr')


# Load data  ------------------------------------------


# Download point pattern increment files and all states files -----------------------------------------------------

# There are 12 regions
# 1.  Rangelands West
# 2. Wet Tropics
# 3. Rangelands
# 4. Central Slopes
# 5. Monsoonal North
# 6. Murray Basin
# 7. East Coast North
# 8. East Coast South
# 9. Southern Slopes Mainland
# 10. Southern Slopes Tasmania
# 11. East flatlands
# 12. West flatlands

# Two files for each region
# An increments file - which has the temporal patterns
# An Allstats file which as information on the storms that were used to generate the temporal patterns.


# Get all the point Increment files

# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT

list.files('../data')


all_increment_files <- str_subset(list.files('../data'), '(Increments)') # Extract files that contain  'Increments'

point_increment_files <- all_increment_files[!str_detect(all_increment_files, 'Areal')]  # Remove those that contain 'Areal'

length(point_increment_files) # 12, as expected

point_increment_files

# [1] "CS_Increments.csv"         "ECnorth_Increments.csv"    "ECsouth_Increments.csv"    "FLTeast_Increments .csv"   "FLTwest_Increments.csv"
# [6] "MB_Increments.csv"         "MN_Increments.csv"         "R_Increments.csv"          "Rwest_Increments.csv"      "SSmainland_Increments.csv"
# [11] "SStas_Increments.csv"      "WT_Increments.csv"

# list the All Stats files

all_stats_files <- str_subset(list.files('../data'), 'AllStats') # Extract files that contain 'AllStats'
point_stats_files <- all_stats_files[!str_detect(all_stats_files, 'Areal')]

point_stats_files

# [1] "CS_AllStats.csv"         "ECnorth_AllStats.csv"    "ECsouth_AllStats.csv"    "FLTeast_AllStats .csv"   "FLTwest_AllStats.csv"    "MB_AllStats.csv"
# [7] "MN_AllStats.csv"         "R_AllStats.csv"          "Rwest_AllStats .csv"     "SSmainland_AllStats.csv" "SStas_AllStats.csv"      "WT_AllStats.csv"


# Function to download an all stats file, convert to a dataframe and store the whole lot in a data frame

Get_burst_stats <- function(fname) {

  # Read in all the patterns
  # One pattern per line


  #fname <- "CS_AllStats.csv"
  burst_stats <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)

  # Split the strings at commas
  burst_stats <- str_split(burst_stats, ',')


  # Convert to data frame
  burst_stats <- as_data_frame(rlang::exec('rbind', !!!burst_stats))

    # name the columns
  names(burst_stats) <- c('eventID','region', 'region_source',
                          'burst_duration','burst_loading', 'original_burst_depth',
                          'aep_window', 'aep_source', 'burst_start', 'burst_end',
                          'event_ref_number', 'pluvio_ref', 'offical_gauge',
                          'lat', 'lon')

  burst_stats

}


# Try on one file
Get_burst_stats("MN_AllStats.csv")

# Get stats for all regions
all_regions_stats <- map_df(.x = point_stats_files, .f = Get_burst_stats)

all_regions_stats

# Function to calcuate the NUI - non-uniformity index for a rainfall pattern

Chisquare_p <- function(p) {
  p <- p[-(1:5)] # Remove the first 5 elements
  p <- as.numeric(p) # Convert to numeric
  p_e <- rep(100/length(p), times = length(p)) # Expected
  chi <- sum(((p - p_e)^2)/p_e) # o - e
  -log(-pchisq(chi, df = length(p) - 1, log.p = TRUE))
  # chisquare p value setting log = TRUE because some values are very small
  # Then take the log of the this value to make it a reasonable size
}


# Function to read in patterns, calculate NUI and return a data frame with information:
# eventID, duration_min, duration_hr, interval_min, region, NUI

# fname is the file name of the increments file

Calc_NUI_for_patterns <- function(fname) {


  #fname <- "CS_Increments.csv"

  # Read in all the patterns
  # One pattern per line
  pat <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)

  #pat


  # remove trialing commas

  pat <- str_replace(pat, '[,]*$', '')

  #pat

  # Split the strings at commas
  pat <- str_split(pat, ',')

  #pat

  # Calculate the Chisquare statistic for each pattern
  chisq_stat <- unlist(map(.x = pat, .f = Chisquare_p))

  #barplot(sort(chisq_stat))


  # Function to grab the first 5 fields from a temporal pattern
  # The first 5 entres are:
  # 1. EventID
  # 2. Duration (minutes)
  # 3. Time step
  # 4. Region
  # 5. AEP

  First_five <- function(p) {
    p <- p[1:5]
  }


  names(pat) <- 1:length(pat) # pat needs to have names for map_df to work
  pat_db <- map_df(.x = pat, .f= First_five) # get first 5 fields
  pat_db <- as_data_frame(t(as.matrix(pat_db))) # need to transpose to convert to data frame
  names(pat_db) <- c('eventID', 'duration_min', 'time_step', 'region', 'aep')


  # Add the non_uniformity index value NUI

  pat_db <- pat_db %>%
    mutate(NUI = chisq_stat) %>%
    mutate(duration_min = as.integer(duration_min)) %>%
    mutate(row_n = 1:length(pat)) %>%
    mutate(duration_hr = as.integer(duration_min/60)) %>%
    select(row_n, eventID, duration_min, duration_hr, time_step, region, aep, NUI)


  # return the data for each region

  pat_db %>%
    arrange(desc(NUI))


}


# Try on one file

Calc_NUI_for_patterns("CS_Increments.csv")

# Process all increment files to creat a summary for all patterns


all_point_patterns_sum <- map_df(.x = point_increment_files, .f = Calc_NUI_for_patterns)
all_point_patterns_sum <- all_point_patterns_sum %>%  arrange(desc(NUI))

all_point_patterns_sum

#Attach storm data to pattern summary


all_point_patterns_sum <- all_point_patterns_sum %>%
  left_join(all_regions_stats, by = c("region" = "region", "eventID" = "eventID")) %>%
  arrange(desc(NUI))

all_point_patterns_sum %>% View
all_point_patterns_sum %>%
  select(2:9) %>% View

# row_n eventID duration_min duration_hr time_step region                aep            NUI region_source
# <int> <chr>          <int>       <int> <chr>     <chr>                 <chr>        <dbl> <chr>
# 1   665 1208            8640         144 180       Rangelands (West)     frequent      419. Rangelands (West)
# 2   706 1182           10080         168 180       Rangelands (West)     intermediate  416. Rangelands (West)
# 3   697 1973           10080         168 180       Rangelands            frequent      375. Rangelands
# 4   676 1183            8640         144 180       Rangelands            intermediate  357. Rangelands
# 5   676 1181            8640         144 180       Rangelands (West)     intermediate  357. Rangelands (West)
# 6   695 2870           10080         168 180       Central Slopes        frequent      342. Central Slopes
# 7   703 1083           10080         168 180       Rangelands            intermediate  341. Rangelands
# 8   705 1079           10080         168 180       Rangelands (West)     intermediate  341. Rangelands (West)
# 9   696 1971           10080         168 180       Rangelands            frequent      338. Rangelands
# 10   673 1082            8640         144 180       Rangelands            intermediate  326. Rangelands
# 11   673 1078            8640         144 180       Rangelands (West)     intermediate  326. Rangelands (West)
# 12   691 7838           10080         168 180       S-SW Flatlands (East) frequent      305. S-SW Flatlands (East)
# 13   662 7837            8640         144 180       S-SW Flatlands (East) frequent      303. S-SW Flatlands (East)
# 14   700 1979           10080         168 180       Rangelands            frequent      284. Rangelands
# 15   633 7836            7200         120 180       S-SW Flatlands (East) frequent      283. S-SW Flatlands (East)
# 16   644 1081            7200         120 180       Rangelands            intermediate  283. Rangelands
# 17   644 1077            7200         120 180       Rangelands (West)     intermediate  283. Rangelands (West)
# 18   699 5767           10080         168 180       East Coast (North)    frequent      279. East Coast (North)
# 19   698 1264           10080         168 180       Rangelands (West)     frequent      278. Rangelands (West)
# 20   699 2874           10080         168 180       Central Slopes        frequent      274. Central Slopes
# 21   694 1263           10080         168 180       Rangelands            frequent      272. Rangelands


#saveRDS(all_point_patterns_sum, file = file.path('../data', 'all_point_patterns_sum.rds'))


# Process patterns for graphing -------------------------------------------


# Function to create list of patterns
# Patterns need to be stored in a list rather than a data frame because the number of increments vary

List_patterns <- function(fname) {
  pat <- read_lines(file = file.path('../data', fname ), skip = 1, n_max = -1L)


  # strip off training commas

  pat <- str_replace(pat, '[,]*$', '')

  # Split the strings at commas
  pat <- str_split(pat, ',')

  # Get the region names and the event IDs so that we can name list
  # elements and then pull individual patterns out.
  # The region names are the 4th element and the EventID number is the first

  patIDs <- map_chr(.x = 1:length(pat), .f = function(i)(str_c (pat[[i]][4], pat[[i]][1], sep = '_')))
  patIDs <- str_to_lower(patIDs) # convert to lower case
  patIDs <- str_replace_all(patIDs, ' ', '_') # convert to snake case

  names(pat) <- patIDs

  return(pat)

}


# Apply to all increments files

all_point_increments <-  map(.x = point_increment_files, .f = List_patterns)
all_point_increments


# This produces a list of 12 lists
# Use purrr::flatten to make into a single list

all_point_increments <- purrr::flatten(all_point_increments)
#head(all_increments)
#tail(all_increments)

# What are the region names
all_names <- names(all_point_increments)
unique(str_replace(all_names, '[0-9]+', '')) # remove the trailling numbers so we are just left with the names,

# [1] "central_slopes_"             "east_coast_(north)_"         "east_coast_(south)_"         "s-sw_flatlands_(east)_"      "s-sw_flatlands_(west)_"
# [6] "murray_basin_"               "monsoonal_north_"            "rangelands_"                 "rangelands_(west)_"          "southern_slopes_(mainland)_"
# [11] "southern_slopes_(tasmania)_" "wet_tropics_"

# Save all_point_increments file for use in other scripts
#saveRDS(all_point_increments, file = file.path('../data','all_point_increments.rds'))

# Function to graph a pattern given the region name and event id e.g. central_slopes_3565

Graph_pattern <- function(pat_name_id) {

  pattern_ids = names(all_point_increments)

  # if only a pattern number is provided
  # change to region_number format, i.e. find the pattern in the names of the all_increments list

  if(is.numeric(pat_name_id)) {
    pat_name_id = pattern_ids[(str_detect(pattern_ids, as.character(pat_name_id)))]
  }


  pat_data <- all_point_increments[[pat_name_id]]

  my_eventID <- pat_data[1]
  my_duration <- pat_data[2]
  my_increments <- pat_data[3]
  my_region <- pat_data[4]
  my_aep <-  pat_data[5]
  my_breaks <- seq(0, 60, by = 5) # The maximum number of increments is 56
  # so setting this to 60 will always work


  my_title <- sprintf('Areal Temporal Pattern %s',my_eventID)

  my_subtitle <- sprintf('Region = %s, AEP = %s, \nDuration = %s min (%.0f hour), %i increments of %s min',
                         my_region, my_aep, my_duration, as.numeric(my_duration)/60, as.numeric(my_duration)/as.numeric(my_increments), my_increments)

  # remove header information just leaving numbers
  pat_data <- as.numeric(pat_data[-1:-5]  )

  # Create dataframe for plotting
  pat_data_df <- data_frame(increments = 1:length(pat_data), percentages = pat_data )


  p <- pat_data_df %>%
    ggplot(aes(x = increments, y = pat_data)) +
    geom_col(fill = 'blue', colour = 'black') +
    scale_y_continuous(name = 'Percentage in each increment') +
    scale_x_continuous(breaks = my_breaks) +
    labs(title = my_title,
         subtitle = my_subtitle) +
    theme_grey(base_size = 5)


  # five minors per major
  #https://stackoverflow.com/questions/39691591/ggplot2-integer-multiple-of-minor-breaks-per-major-break

  majors <- ggplot_build(p)$layout$panel_params[[1]]$x.major_source

  multiplier <- 5
  minors <- seq(from = min(majors),
                to = max(majors),
                length.out = ((length(majors) - 1) * multiplier) + 1)
  p <- p + scale_x_continuous(name = 'Increments', breaks = my_breaks, minor_breaks = minors)
  suppressMessages(print(p))
}


# Create graphs


p1 <- Graph_pattern(1208)
p2 <-Graph_pattern(1182)
p3 <-Graph_pattern(1973)
p4 <-Graph_pattern(1183)

p <- p1 + p2 + p3 + p4

ggsave(file.path('../figures', 'point_patterns_1-4.png'), p, width = 4, height = 3)


p5 <- Graph_pattern(1181)
p6 <-Graph_pattern(2870)
p7 <-Graph_pattern(1083)
p8 <-Graph_pattern(1079)

p <- p5 + p6 + p7 + p8
p
ggsave(file.path('../figures', 'point_patterns_2-8.png'), p, width = 4, height = 3)


# Peaks 24 hours apart ----------------------------------------------------

peak_diff_pattern <- function(pat_name_id) {


  pat_data <- all_point_increments[[pat_name_id]]

  my_eventID <- pat_data[1]
  my_duration <- pat_data[2]
  my_increments <- pat_data[3]
  my_region <- pat_data[4]
  my_area <-  pat_data[5]


  pat_data <- as.numeric(pat_data[-1:-5]  )
  stopifnot(sum(pat_data) == 100)

  pat_data_df <- data_frame(inc_num = 1:length(pat_data),
                            tt = as.numeric(my_increments) * inc_num,
                            pc = as.numeric(pat_data))


  pat_data_df %>%
    arrange(desc(pc)) %>%
    summarise(eventID = my_eventID,
              peak_1 = pc[1],
              peak_2 = pc[2],
              diff_time = abs(tt[1] - tt[2]),
              diff_time_hr = diff_time/60)


}


# process all patterns
diffs <- map_df(.x = 1:length(all_point_increments), .f = peak_diff_pattern)

diffs


# Search for patterns were the peaks are large and 24 hours apart

diffs %>%
  filter(diff_time_hr == 24) %>%
  filter(peak_1 >15 & peak_2 >15) %>%
  arrange(desc(peak_1), desc(peak_2)) %>% View


diffs %>%
  filter(diff_time_hr == 24) %>%
  filter(peak_1 >15 & peak_2 >15) %>%
  arrange(desc(peak_1), desc(peak_2))


# # A tibble: 14 x 5
# eventID peak_1 peak_2 diff_time diff_time_hr
# <chr>    <dbl>  <dbl>     <dbl>        <dbl>
#   1 5636      31.0   30.6      1440           24
# 2 5679      29.6   15.2      1440           24
# 3 5635      27.1   22.7      1440           24
# 4 7695      26.5   15.6      1440           24
# 5 3561      26.1   19.3      1440           24
# 6 9006      25.3   15.2      1440           24
# 7 7696      23.4   15.0      1440           24
# 8 6343      21.6   16.5      1440           24
# 9 8386      19.5   15.4      1440           24
# 10 6376      19     17.0      1440           24
# 11 892       18.2   17.8      1440           24
# 12 4142      17.6   15.9      1440           24
# 13 7729      17.5   16.5      1440           24
# 14 8368      17.0   16.0      1440           24


# Check the statistics for these events

ids <- diffs %>%
  filter(diff_time_hr == 24) %>%
  filter(peak_1 >15 & peak_2 >15) %>%
  arrange(desc(peak_1), desc(peak_2)) %>%
  slice(1:10) %>%
  select(eventID) %>% pull


all_point_patterns_sum %>%
  filter(eventID %in% (ids)) %>% View


p1 <- Graph_pattern(5636)
p2 <-Graph_pattern(5679)
p3 <-Graph_pattern(5635)
p4 <-Graph_pattern(7695)

p <- p1 + p2 + p3 + p4


	# I previously looked at areal patterns https://tonyladson.wordpress.com/2019/04/02/check-arr-temporal-patterns-before-you-use-them/
	# This blog extends the analysis to point patterns


	# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT


	# Packages ----------------------------------------------------------------

	devtools::install_github("thomasp85/patchwork")

	library(tidyverse)
	library(stringr)
	library(rlang)
	library(ggmap)
	library(patchwork)

	#help(package = 'stringr')


	# Load data ------------------------------------------



	# Download point pattern increment files and all states files -----------------------------------------------------

	# There are 12 regions
	# 1. Rangelands West
	# 2. Wet Tropics
	# 3. Rangelands
	# 4. Central Slopes
	# 5. Monsoonal North
	# 6. Murray Basin
	# 7. East Coast North
	# 8. East Coast South
	# 9. Southern Slopes Mainland
	# 10. Southern Slopes Tasmania
	# 11. East flatlands
	# 12. West flatlands

	# Two files for each region
	# An increments file - which has the temporal patterns
	# An Allstats file which as information on the storms that were used to generate the temporal patterns.


	# Get all the point Increment files

	# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT

	list.files('../data')


	all_increment_files <- str_subset(list.files('../data'), '(Increments)') # Extract files that contain 'Increments'

	point_increment_files <- all_increment_files[!str_detect(all_increment_files, 'Areal')] # Remove those that contain 'Areal'

	length(point_increment_files) # 12, as expected

	point_increment_files

	# [1] "CS_Increments.csv" "ECnorth_Increments.csv" "ECsouth_Increments.csv" "FLTeast_Increments .csv" "FLTwest_Increments.csv"
	# [6] "MB_Increments.csv" "MN_Increments.csv" "R_Increments.csv" "Rwest_Increments.csv" "SSmainland_Increments.csv"
	# [11] "SStas_Increments.csv" "WT_Increments.csv"

	# list the All Stats files

	all_stats_files <- str_subset(list.files('../data'), 'AllStats') # Extract files that contain 'AllStats'
	point_stats_files <- all_stats_files[!str_detect(all_stats_files, 'Areal')]

	point_stats_files

	# [1] "CS_AllStats.csv" "ECnorth_AllStats.csv" "ECsouth_AllStats.csv" "FLTeast_AllStats .csv" "FLTwest_AllStats.csv" "MB_AllStats.csv"
	# [7] "MN_AllStats.csv" "R_AllStats.csv" "Rwest_AllStats .csv" "SSmainland_AllStats.csv" "SStas_AllStats.csv" "WT_AllStats.csv"


	# Function to download an all stats file, convert to a dataframe and store the whole lot in a data frame

	Get_burst_stats <- function(fname) {

	# Read in all the patterns
	# One pattern per line


	#fname <- "CS_AllStats.csv"
	burst_stats <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)

	# Split the strings at commas
	burst_stats <- str_split(burst_stats, ',')


	# Convert to data frame
	burst_stats <- as_data_frame(rlang::exec('rbind', !!!burst_stats))

	# name the columns
	names(burst_stats) <- c('eventID','region', 'region_source',
	'burst_duration','burst_loading', 'original_burst_depth',
	'aep_window', 'aep_source', 'burst_start', 'burst_end',
	'event_ref_number', 'pluvio_ref', 'offical_gauge',
	'lat', 'lon')

	burst_stats

	}


	# Try on one file
	Get_burst_stats("MN_AllStats.csv")

	# Get stats for all regions
	all_regions_stats <- map_df(.x = point_stats_files, .f = Get_burst_stats)

	all_regions_stats

	# Function to calcuate the NUI - non-uniformity index for a rainfall pattern

	Chisquare_p <- function(p) {
	p <- p[-(1:5)] # Remove the first 5 elements
	p <- as.numeric(p) # Convert to numeric
	p_e <- rep(100/length(p), times = length(p)) # Expected
	chi <- sum(((p - p_e)^2)/p_e) # o - e
	-log(-pchisq(chi, df = length(p) - 1, log.p = TRUE))
	# chisquare p value setting log = TRUE because some values are very small
	# Then take the log of the this value to make it a reasonable size
	}


	# Function to read in patterns, calculate NUI and return a data frame with information:
	# eventID, duration_min, duration_hr, interval_min, region, NUI

	# fname is the file name of the increments file

	Calc_NUI_for_patterns <- function(fname) {


	#fname <- "CS_Increments.csv"

	# Read in all the patterns
	# One pattern per line
	pat <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)

	#pat


	# remove trialing commas

	pat <- str_replace(pat, '[,]*$', '')

	#pat

	# Split the strings at commas
	pat <- str_split(pat, ',')

	#pat

	# Calculate the Chisquare statistic for each pattern
	chisq_stat <- unlist(map(.x = pat, .f = Chisquare_p))

	#barplot(sort(chisq_stat))



	# Function to grab the first 5 fields from a temporal pattern
	# The first 5 entres are:
	# 1. EventID
	# 2. Duration (minutes)
	# 3. Time step
	# 4. Region
	# 5. AEP

	First_five <- function(p) {
	p <- p[1:5]
	}


	names(pat) <- 1:length(pat) # pat needs to have names for map_df to work
	pat_db <- map_df(.x = pat, .f= First_five) # get first 5 fields
	pat_db <- as_data_frame(t(as.matrix(pat_db))) # need to transpose to convert to data frame
	names(pat_db) <- c('eventID', 'duration_min', 'time_step', 'region', 'aep')


	# Add the non_uniformity index value NUI

	pat_db <- pat_db %>%
	mutate(NUI = chisq_stat) %>%
	mutate(duration_min = as.integer(duration_min)) %>%
	mutate(row_n = 1:length(pat)) %>%
	mutate(duration_hr = as.integer(duration_min/60)) %>%
	select(row_n, eventID, duration_min, duration_hr, time_step, region, aep, NUI)


	# return the data for each region

	pat_db %>%
	arrange(desc(NUI))



	}


	# Try on one file

	Calc_NUI_for_patterns("CS_Increments.csv")

	# Process all increment files to creat a summary for all patterns


	all_point_patterns_sum <- map_df(.x = point_increment_files, .f = Calc_NUI_for_patterns)
	all_point_patterns_sum <- all_point_patterns_sum %>% arrange(desc(NUI))

	all_point_patterns_sum

	#Attach storm data to pattern summary


	all_point_patterns_sum <- all_point_patterns_sum %>%
	left_join(all_regions_stats, by = c("region" = "region", "eventID" = "eventID")) %>%
	arrange(desc(NUI))

	all_point_patterns_sum %>% View
	all_point_patterns_sum %>%
	select(2:9) %>% View

	# row_n eventID duration_min duration_hr time_step region aep NUI region_source
	# <int> <chr> <int> <int> <chr> <chr> <chr> <dbl> <chr>
	# 1 665 1208 8640 144 180 Rangelands (West) frequent 419. Rangelands (West)
	# 2 706 1182 10080 168 180 Rangelands (West) intermediate 416. Rangelands (West)
	# 3 697 1973 10080 168 180 Rangelands frequent 375. Rangelands
	# 4 676 1183 8640 144 180 Rangelands intermediate 357. Rangelands
	# 5 676 1181 8640 144 180 Rangelands (West) intermediate 357. Rangelands (West)
	# 6 695 2870 10080 168 180 Central Slopes frequent 342. Central Slopes
	# 7 703 1083 10080 168 180 Rangelands intermediate 341. Rangelands
	# 8 705 1079 10080 168 180 Rangelands (West) intermediate 341. Rangelands (West)
	# 9 696 1971 10080 168 180 Rangelands frequent 338. Rangelands
	# 10 673 1082 8640 144 180 Rangelands intermediate 326. Rangelands
	# 11 673 1078 8640 144 180 Rangelands (West) intermediate 326. Rangelands (West)
	# 12 691 7838 10080 168 180 S-SW Flatlands (East) frequent 305. S-SW Flatlands (East)
	# 13 662 7837 8640 144 180 S-SW Flatlands (East) frequent 303. S-SW Flatlands (East)
	# 14 700 1979 10080 168 180 Rangelands frequent 284. Rangelands
	# 15 633 7836 7200 120 180 S-SW Flatlands (East) frequent 283. S-SW Flatlands (East)
	# 16 644 1081 7200 120 180 Rangelands intermediate 283. Rangelands
	# 17 644 1077 7200 120 180 Rangelands (West) intermediate 283. Rangelands (West)
	# 18 699 5767 10080 168 180 East Coast (North) frequent 279. East Coast (North)
	# 19 698 1264 10080 168 180 Rangelands (West) frequent 278. Rangelands (West)
	# 20 699 2874 10080 168 180 Central Slopes frequent 274. Central Slopes
	# 21 694 1263 10080 168 180 Rangelands frequent 272. Rangelands


	#saveRDS(all_point_patterns_sum, file = file.path('../data', 'all_point_patterns_sum.rds'))



	# Process patterns for graphing -------------------------------------------


	# Function to create list of patterns
	# Patterns need to be stored in a list rather than a data frame because the number of increments vary

	List_patterns <- function(fname) {
	pat <- read_lines(file = file.path('../data', fname ), skip = 1, n_max = -1L)


	# strip off training commas

	pat <- str_replace(pat, '[,]*$', '')

	# Split the strings at commas
	pat <- str_split(pat, ',')

	# Get the region names and the event IDs so that we can name list
	# elements and then pull individual patterns out.
	# The region names are the 4th element and the EventID number is the first

	patIDs <- map_chr(.x = 1:length(pat), .f = function(i)(str_c (pat[[i]][4], pat[[i]][1], sep = '_')))
	patIDs <- str_to_lower(patIDs) # convert to lower case
	patIDs <- str_replace_all(patIDs, ' ', '_') # convert to snake case

	names(pat) <- patIDs

	return(pat)

	}




	# Apply to all increments files

	all_point_increments <- map(.x = point_increment_files, .f = List_patterns)
	all_point_increments


	# This produces a list of 12 lists
	# Use purrr::flatten to make into a single list

	all_point_increments <- purrr::flatten(all_point_increments)
	#head(all_increments)
	#tail(all_increments)

	# What are the region names
	all_names <- names(all_point_increments)
	unique(str_replace(all_names, '[0-9]+', '')) # remove the trailling numbers so we are just left with the names,

	# [1] "central_slopes_" "east_coast_(north)_" "east_coast_(south)_" "s-sw_flatlands_(east)_" "s-sw_flatlands_(west)_"
	# [6] "murray_basin_" "monsoonal_north_" "rangelands_" "rangelands_(west)_" "southern_slopes_(mainland)_"
	# [11] "southern_slopes_(tasmania)_" "wet_tropics_"

	# Save all_point_increments file for use in other scripts
	#saveRDS(all_point_increments, file = file.path('../data','all_point_increments.rds'))

	# Function to graph a pattern given the region name and event id e.g. central_slopes_3565

	Graph_pattern <- function(pat_name_id) {

	pattern_ids = names(all_point_increments)

	# if only a pattern number is provided
	# change to region_number format, i.e. find the pattern in the names of the all_increments list

	if(is.numeric(pat_name_id)) {
	pat_name_id = pattern_ids[(str_detect(pattern_ids, as.character(pat_name_id)))]
	}



	pat_data <- all_point_increments[[pat_name_id]]

	my_eventID <- pat_data[1]
	my_duration <- pat_data[2]
	my_increments <- pat_data[3]
	my_region <- pat_data[4]
	my_aep <- pat_data[5]
	my_breaks <- seq(0, 60, by = 5) # The maximum number of increments is 56
	# so setting this to 60 will always work


	my_title <- sprintf('Areal Temporal Pattern %s',my_eventID)

	my_subtitle <- sprintf('Region = %s, AEP = %s, \nDuration = %s min (%.0f hour), %i increments of %s min',
	my_region, my_aep, my_duration, as.numeric(my_duration)/60, as.numeric(my_duration)/as.numeric(my_increments), my_increments)

	# remove header information just leaving numbers
	pat_data <- as.numeric(pat_data[-1:-5] )

	# Create dataframe for plotting
	pat_data_df <- data_frame(increments = 1:length(pat_data), percentages = pat_data )


	p <- pat_data_df %>%
	ggplot(aes(x = increments, y = pat_data)) +
	geom_col(fill = 'blue', colour = 'black') +
	scale_y_continuous(name = 'Percentage in each increment') +
	scale_x_continuous(breaks = my_breaks) +
	labs(title = my_title,
	subtitle = my_subtitle) +
	theme_grey(base_size = 5)


	# five minors per major
	#https://stackoverflow.com/questions/39691591/ggplot2-integer-multiple-of-minor-breaks-per-major-break

	majors <- ggplot_build(p)$layout$panel_params[[1]]$x.major_source

	multiplier <- 5
	minors <- seq(from = min(majors),
	to = max(majors),
	length.out = ((length(majors) - 1) * multiplier) + 1)
	p <- p + scale_x_continuous(name = 'Increments', breaks = my_breaks, minor_breaks = minors)
	suppressMessages(print(p))
	}




	# Create graphs




	p1 <- Graph_pattern(1208)
	p2 <-Graph_pattern(1182)
	p3 <-Graph_pattern(1973)
	p4 <-Graph_pattern(1183)

	p <- p1 + p2 + p3 + p4

	ggsave(file.path('../figures', 'point_patterns_1-4.png'), p, width = 4, height = 3)


	p5 <- Graph_pattern(1181)
	p6 <-Graph_pattern(2870)
	p7 <-Graph_pattern(1083)
	p8 <-Graph_pattern(1079)

	p <- p5 + p6 + p7 + p8
	p
	ggsave(file.path('../figures', 'point_patterns_2-8.png'), p, width = 4, height = 3)



	# Peaks 24 hours apart ----------------------------------------------------

	peak_diff_pattern <- function(pat_name_id) {


	pat_data <- all_point_increments[[pat_name_id]]

	my_eventID <- pat_data[1]
	my_duration <- pat_data[2]
	my_increments <- pat_data[3]
	my_region <- pat_data[4]
	my_area <- pat_data[5]


	pat_data <- as.numeric(pat_data[-1:-5] )
	stopifnot(sum(pat_data) == 100)

	pat_data_df <- data_frame(inc_num = 1:length(pat_data),
	tt = as.numeric(my_increments) * inc_num,
	pc = as.numeric(pat_data))


	pat_data_df %>%
	arrange(desc(pc)) %>%
	summarise(eventID = my_eventID,
	peak_1 = pc[1],
	peak_2 = pc[2],
	diff_time = abs(tt[1] - tt[2]),
	diff_time_hr = diff_time/60)


	}


	# process all patterns
	diffs <- map_df(.x = 1:length(all_point_increments), .f = peak_diff_pattern)

	diffs


	# Search for patterns were the peaks are large and 24 hours apart

	diffs %>%
	filter(diff_time_hr == 24) %>%
	filter(peak_1 >15 & peak_2 >15) %>%
	arrange(desc(peak_1), desc(peak_2)) %>% View


	diffs %>%
	filter(diff_time_hr == 24) %>%
	filter(peak_1 >15 & peak_2 >15) %>%
	arrange(desc(peak_1), desc(peak_2))



	# # A tibble: 14 x 5
	# eventID peak_1 peak_2 diff_time diff_time_hr
	# <chr> <dbl> <dbl> <dbl> <dbl>
	# 1 5636 31.0 30.6 1440 24
	# 2 5679 29.6 15.2 1440 24
	# 3 5635 27.1 22.7 1440 24
	# 4 7695 26.5 15.6 1440 24
	# 5 3561 26.1 19.3 1440 24
	# 6 9006 25.3 15.2 1440 24
	# 7 7696 23.4 15.0 1440 24
	# 8 6343 21.6 16.5 1440 24
	# 9 8386 19.5 15.4 1440 24
	# 10 6376 19 17.0 1440 24
	# 11 892 18.2 17.8 1440 24
	# 12 4142 17.6 15.9 1440 24
	# 13 7729 17.5 16.5 1440 24
	# 14 8368 17.0 16.0 1440 24



	# Check the statistics for these events

	ids <- diffs %>%
	filter(diff_time_hr == 24) %>%
	filter(peak_1 >15 & peak_2 >15) %>%
	arrange(desc(peak_1), desc(peak_2)) %>%
	slice(1:10) %>%
	select(eventID) %>% pull



	all_point_patterns_sum %>%
	filter(eventID %in% (ids)) %>% View




	p1 <- Graph_pattern(5636)
	p2 <-Graph_pattern(5679)
	p3 <-Graph_pattern(5635)
	p4 <-Graph_pattern(7695)

	p <- p1 + p2 + p3 + p4