Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active June 15, 2019 04:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/c67fa8efe355ba97375fd40a411618d7 to your computer and use it in GitHub Desktop.
Save TonyLadson/c67fa8efe355ba97375fd40a411618d7 to your computer and use it in GitHub Desktop.
Analysis of point rainfall temporal patterns from Australian Rainfall and Runoff https://tonyladson.wordpress.com/2019/04/16/arr-point-temporal-patterns/
# I previously looked at areal patterns https://tonyladson.wordpress.com/2019/04/02/check-arr-temporal-patterns-before-you-use-them/
# This blog extends the analysis to point patterns
# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT
# Packages ----------------------------------------------------------------
devtools::install_github("thomasp85/patchwork")
library(tidyverse)
library(stringr)
library(rlang)
library(ggmap)
library(patchwork)
#help(package = 'stringr')
# Load data ------------------------------------------
# Download point pattern increment files and all states files -----------------------------------------------------
# There are 12 regions
# 1. Rangelands West
# 2. Wet Tropics
# 3. Rangelands
# 4. Central Slopes
# 5. Monsoonal North
# 6. Murray Basin
# 7. East Coast North
# 8. East Coast South
# 9. Southern Slopes Mainland
# 10. Southern Slopes Tasmania
# 11. East flatlands
# 12. West flatlands
# Two files for each region
# An increments file - which has the temporal patterns
# An Allstats file which as information on the storms that were used to generate the temporal patterns.
# Get all the point Increment files
# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT
list.files('../data')
all_increment_files <- str_subset(list.files('../data'), '(Increments)') # Extract files that contain 'Increments'
point_increment_files <- all_increment_files[!str_detect(all_increment_files, 'Areal')] # Remove those that contain 'Areal'
length(point_increment_files) # 12, as expected
point_increment_files
# [1] "CS_Increments.csv" "ECnorth_Increments.csv" "ECsouth_Increments.csv" "FLTeast_Increments .csv" "FLTwest_Increments.csv"
# [6] "MB_Increments.csv" "MN_Increments.csv" "R_Increments.csv" "Rwest_Increments.csv" "SSmainland_Increments.csv"
# [11] "SStas_Increments.csv" "WT_Increments.csv"
# list the All Stats files
all_stats_files <- str_subset(list.files('../data'), 'AllStats') # Extract files that contain 'AllStats'
point_stats_files <- all_stats_files[!str_detect(all_stats_files, 'Areal')]
point_stats_files
# [1] "CS_AllStats.csv" "ECnorth_AllStats.csv" "ECsouth_AllStats.csv" "FLTeast_AllStats .csv" "FLTwest_AllStats.csv" "MB_AllStats.csv"
# [7] "MN_AllStats.csv" "R_AllStats.csv" "Rwest_AllStats .csv" "SSmainland_AllStats.csv" "SStas_AllStats.csv" "WT_AllStats.csv"
# Function to download an all stats file, convert to a dataframe and store the whole lot in a data frame
Get_burst_stats <- function(fname) {
# Read in all the patterns
# One pattern per line
#fname <- "CS_AllStats.csv"
burst_stats <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)
# Split the strings at commas
burst_stats <- str_split(burst_stats, ',')
# Convert to data frame
burst_stats <- as_data_frame(rlang::exec('rbind', !!!burst_stats))
# name the columns
names(burst_stats) <- c('eventID','region', 'region_source',
'burst_duration','burst_loading', 'original_burst_depth',
'aep_window', 'aep_source', 'burst_start', 'burst_end',
'event_ref_number', 'pluvio_ref', 'offical_gauge',
'lat', 'lon')
burst_stats
}
# Try on one file
Get_burst_stats("MN_AllStats.csv")
# Get stats for all regions
all_regions_stats <- map_df(.x = point_stats_files, .f = Get_burst_stats)
all_regions_stats
# Function to calcuate the NUI - non-uniformity index for a rainfall pattern
Chisquare_p <- function(p) {
p <- p[-(1:5)] # Remove the first 5 elements
p <- as.numeric(p) # Convert to numeric
p_e <- rep(100/length(p), times = length(p)) # Expected
chi <- sum(((p - p_e)^2)/p_e) # o - e
-log(-pchisq(chi, df = length(p) - 1, log.p = TRUE))
# chisquare p value setting log = TRUE because some values are very small
# Then take the log of the this value to make it a reasonable size
}
# Function to read in patterns, calculate NUI and return a data frame with information:
# eventID, duration_min, duration_hr, interval_min, region, NUI
# fname is the file name of the increments file
Calc_NUI_for_patterns <- function(fname) {
#fname <- "CS_Increments.csv"
# Read in all the patterns
# One pattern per line
pat <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)
#pat
# remove trialing commas
pat <- str_replace(pat, '[,]*$', '')
#pat
# Split the strings at commas
pat <- str_split(pat, ',')
#pat
# Calculate the Chisquare statistic for each pattern
chisq_stat <- unlist(map(.x = pat, .f = Chisquare_p))
#barplot(sort(chisq_stat))
# Function to grab the first 5 fields from a temporal pattern
# The first 5 entres are:
# 1. EventID
# 2. Duration (minutes)
# 3. Time step
# 4. Region
# 5. AEP
First_five <- function(p) {
p <- p[1:5]
}
names(pat) <- 1:length(pat) # pat needs to have names for map_df to work
pat_db <- map_df(.x = pat, .f= First_five) # get first 5 fields
pat_db <- as_data_frame(t(as.matrix(pat_db))) # need to transpose to convert to data frame
names(pat_db) <- c('eventID', 'duration_min', 'time_step', 'region', 'aep')
# Add the non_uniformity index value NUI
pat_db <- pat_db %>%
mutate(NUI = chisq_stat) %>%
mutate(duration_min = as.integer(duration_min)) %>%
mutate(row_n = 1:length(pat)) %>%
mutate(duration_hr = as.integer(duration_min/60)) %>%
select(row_n, eventID, duration_min, duration_hr, time_step, region, aep, NUI)
# return the data for each region
pat_db %>%
arrange(desc(NUI))
}
# Try on one file
Calc_NUI_for_patterns("CS_Increments.csv")
# Process all increment files to creat a summary for all patterns
all_point_patterns_sum <- map_df(.x = point_increment_files, .f = Calc_NUI_for_patterns)
all_point_patterns_sum <- all_point_patterns_sum %>% arrange(desc(NUI))
all_point_patterns_sum
#Attach storm data to pattern summary
all_point_patterns_sum <- all_point_patterns_sum %>%
left_join(all_regions_stats, by = c("region" = "region", "eventID" = "eventID")) %>%
arrange(desc(NUI))
all_point_patterns_sum %>% View
all_point_patterns_sum %>%
select(2:9) %>% View
# row_n eventID duration_min duration_hr time_step region aep NUI region_source
# <int> <chr> <int> <int> <chr> <chr> <chr> <dbl> <chr>
# 1 665 1208 8640 144 180 Rangelands (West) frequent 419. Rangelands (West)
# 2 706 1182 10080 168 180 Rangelands (West) intermediate 416. Rangelands (West)
# 3 697 1973 10080 168 180 Rangelands frequent 375. Rangelands
# 4 676 1183 8640 144 180 Rangelands intermediate 357. Rangelands
# 5 676 1181 8640 144 180 Rangelands (West) intermediate 357. Rangelands (West)
# 6 695 2870 10080 168 180 Central Slopes frequent 342. Central Slopes
# 7 703 1083 10080 168 180 Rangelands intermediate 341. Rangelands
# 8 705 1079 10080 168 180 Rangelands (West) intermediate 341. Rangelands (West)
# 9 696 1971 10080 168 180 Rangelands frequent 338. Rangelands
# 10 673 1082 8640 144 180 Rangelands intermediate 326. Rangelands
# 11 673 1078 8640 144 180 Rangelands (West) intermediate 326. Rangelands (West)
# 12 691 7838 10080 168 180 S-SW Flatlands (East) frequent 305. S-SW Flatlands (East)
# 13 662 7837 8640 144 180 S-SW Flatlands (East) frequent 303. S-SW Flatlands (East)
# 14 700 1979 10080 168 180 Rangelands frequent 284. Rangelands
# 15 633 7836 7200 120 180 S-SW Flatlands (East) frequent 283. S-SW Flatlands (East)
# 16 644 1081 7200 120 180 Rangelands intermediate 283. Rangelands
# 17 644 1077 7200 120 180 Rangelands (West) intermediate 283. Rangelands (West)
# 18 699 5767 10080 168 180 East Coast (North) frequent 279. East Coast (North)
# 19 698 1264 10080 168 180 Rangelands (West) frequent 278. Rangelands (West)
# 20 699 2874 10080 168 180 Central Slopes frequent 274. Central Slopes
# 21 694 1263 10080 168 180 Rangelands frequent 272. Rangelands
#saveRDS(all_point_patterns_sum, file = file.path('../data', 'all_point_patterns_sum.rds'))
# Process patterns for graphing -------------------------------------------
# Function to create list of patterns
# Patterns need to be stored in a list rather than a data frame because the number of increments vary
List_patterns <- function(fname) {
pat <- read_lines(file = file.path('../data', fname ), skip = 1, n_max = -1L)
# strip off training commas
pat <- str_replace(pat, '[,]*$', '')
# Split the strings at commas
pat <- str_split(pat, ',')
# Get the region names and the event IDs so that we can name list
# elements and then pull individual patterns out.
# The region names are the 4th element and the EventID number is the first
patIDs <- map_chr(.x = 1:length(pat), .f = function(i)(str_c (pat[[i]][4], pat[[i]][1], sep = '_')))
patIDs <- str_to_lower(patIDs) # convert to lower case
patIDs <- str_replace_all(patIDs, ' ', '_') # convert to snake case
names(pat) <- patIDs
return(pat)
}
# Apply to all increments files
all_point_increments <- map(.x = point_increment_files, .f = List_patterns)
all_point_increments
# This produces a list of 12 lists
# Use purrr::flatten to make into a single list
all_point_increments <- purrr::flatten(all_point_increments)
#head(all_increments)
#tail(all_increments)
# What are the region names
all_names <- names(all_point_increments)
unique(str_replace(all_names, '[0-9]+', '')) # remove the trailling numbers so we are just left with the names,
# [1] "central_slopes_" "east_coast_(north)_" "east_coast_(south)_" "s-sw_flatlands_(east)_" "s-sw_flatlands_(west)_"
# [6] "murray_basin_" "monsoonal_north_" "rangelands_" "rangelands_(west)_" "southern_slopes_(mainland)_"
# [11] "southern_slopes_(tasmania)_" "wet_tropics_"
# Save all_point_increments file for use in other scripts
#saveRDS(all_point_increments, file = file.path('../data','all_point_increments.rds'))
# Function to graph a pattern given the region name and event id e.g. central_slopes_3565
Graph_pattern <- function(pat_name_id) {
pattern_ids = names(all_point_increments)
# if only a pattern number is provided
# change to region_number format, i.e. find the pattern in the names of the all_increments list
if(is.numeric(pat_name_id)) {
pat_name_id = pattern_ids[(str_detect(pattern_ids, as.character(pat_name_id)))]
}
pat_data <- all_point_increments[[pat_name_id]]
my_eventID <- pat_data[1]
my_duration <- pat_data[2]
my_increments <- pat_data[3]
my_region <- pat_data[4]
my_aep <- pat_data[5]
my_breaks <- seq(0, 60, by = 5) # The maximum number of increments is 56
# so setting this to 60 will always work
my_title <- sprintf('Areal Temporal Pattern %s',my_eventID)
my_subtitle <- sprintf('Region = %s, AEP = %s, \nDuration = %s min (%.0f hour), %i increments of %s min',
my_region, my_aep, my_duration, as.numeric(my_duration)/60, as.numeric(my_duration)/as.numeric(my_increments), my_increments)
# remove header information just leaving numbers
pat_data <- as.numeric(pat_data[-1:-5] )
# Create dataframe for plotting
pat_data_df <- data_frame(increments = 1:length(pat_data), percentages = pat_data )
p <- pat_data_df %>%
ggplot(aes(x = increments, y = pat_data)) +
geom_col(fill = 'blue', colour = 'black') +
scale_y_continuous(name = 'Percentage in each increment') +
scale_x_continuous(breaks = my_breaks) +
labs(title = my_title,
subtitle = my_subtitle) +
theme_grey(base_size = 5)
# five minors per major
#https://stackoverflow.com/questions/39691591/ggplot2-integer-multiple-of-minor-breaks-per-major-break
majors <- ggplot_build(p)$layout$panel_params[[1]]$x.major_source
multiplier <- 5
minors <- seq(from = min(majors),
to = max(majors),
length.out = ((length(majors) - 1) * multiplier) + 1)
p <- p + scale_x_continuous(name = 'Increments', breaks = my_breaks, minor_breaks = minors)
suppressMessages(print(p))
}
# Create graphs
p1 <- Graph_pattern(1208)
p2 <-Graph_pattern(1182)
p3 <-Graph_pattern(1973)
p4 <-Graph_pattern(1183)
p <- p1 + p2 + p3 + p4
ggsave(file.path('../figures', 'point_patterns_1-4.png'), p, width = 4, height = 3)
p5 <- Graph_pattern(1181)
p6 <-Graph_pattern(2870)
p7 <-Graph_pattern(1083)
p8 <-Graph_pattern(1079)
p <- p5 + p6 + p7 + p8
p
ggsave(file.path('../figures', 'point_patterns_2-8.png'), p, width = 4, height = 3)
# Peaks 24 hours apart ----------------------------------------------------
peak_diff_pattern <- function(pat_name_id) {
pat_data <- all_point_increments[[pat_name_id]]
my_eventID <- pat_data[1]
my_duration <- pat_data[2]
my_increments <- pat_data[3]
my_region <- pat_data[4]
my_area <- pat_data[5]
pat_data <- as.numeric(pat_data[-1:-5] )
stopifnot(sum(pat_data) == 100)
pat_data_df <- data_frame(inc_num = 1:length(pat_data),
tt = as.numeric(my_increments) * inc_num,
pc = as.numeric(pat_data))
pat_data_df %>%
arrange(desc(pc)) %>%
summarise(eventID = my_eventID,
peak_1 = pc[1],
peak_2 = pc[2],
diff_time = abs(tt[1] - tt[2]),
diff_time_hr = diff_time/60)
}
# process all patterns
diffs <- map_df(.x = 1:length(all_point_increments), .f = peak_diff_pattern)
diffs
# Search for patterns were the peaks are large and 24 hours apart
diffs %>%
filter(diff_time_hr == 24) %>%
filter(peak_1 >15 & peak_2 >15) %>%
arrange(desc(peak_1), desc(peak_2)) %>% View
diffs %>%
filter(diff_time_hr == 24) %>%
filter(peak_1 >15 & peak_2 >15) %>%
arrange(desc(peak_1), desc(peak_2))
# # A tibble: 14 x 5
# eventID peak_1 peak_2 diff_time diff_time_hr
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 5636 31.0 30.6 1440 24
# 2 5679 29.6 15.2 1440 24
# 3 5635 27.1 22.7 1440 24
# 4 7695 26.5 15.6 1440 24
# 5 3561 26.1 19.3 1440 24
# 6 9006 25.3 15.2 1440 24
# 7 7696 23.4 15.0 1440 24
# 8 6343 21.6 16.5 1440 24
# 9 8386 19.5 15.4 1440 24
# 10 6376 19 17.0 1440 24
# 11 892 18.2 17.8 1440 24
# 12 4142 17.6 15.9 1440 24
# 13 7729 17.5 16.5 1440 24
# 14 8368 17.0 16.0 1440 24
# Check the statistics for these events
ids <- diffs %>%
filter(diff_time_hr == 24) %>%
filter(peak_1 >15 & peak_2 >15) %>%
arrange(desc(peak_1), desc(peak_2)) %>%
slice(1:10) %>%
select(eventID) %>% pull
all_point_patterns_sum %>%
filter(eventID %in% (ids)) %>% View
p1 <- Graph_pattern(5636)
p2 <-Graph_pattern(5679)
p3 <-Graph_pattern(5635)
p4 <-Graph_pattern(7695)
p <- p1 + p2 + p3 + p4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment