Last active
April 2, 2019 21:35
-
-
Save TonyLadson/d203a2c41cf3ac124f128edba0fa8785 to your computer and use it in GitHub Desktop.
Code to analyse temporal patterns from the Australian Rainfall and Runoff data hub. See bloghttps://tonyladson.wordpress.com/2019/04/02/check-arr-temporal-patterns-before-you-use-them/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT | |
# Packages ---------------------------------------------------------------- | |
devtools::install_github("thomasp85/patchwork") | |
library(tidyverse) | |
library(stringr) | |
library(rlang) | |
library(ggmap) | |
library(patchwork) | |
# Load data for Figure 2 in blog and plot ------------------------------------------ | |
list.files('../data') | |
pat <- read_csv(file = file.path('../data', "Areal_pattern_7200min_200km2_SS.csv" ), skip = 1 ) | |
glimpse(pat) | |
max(pat) | |
#[1] 48.82 | |
ann_text <- data.frame(time = 30, percentage = 35, lab = "ID = 6884", hjust = 'left', | |
pattern_no = factor('X7', levels = str_c('X', 1:10), ordered = TRUE )) | |
p <- pat %>% | |
mutate(time = 1:nrow(pat)) %>% | |
gather(-time, key = 'pattern_no', value = 'percentage') %>% | |
mutate(pattern_no = factor(pattern_no, levels = str_c('X', 1:10), ordered = TRUE )) %>% | |
ggplot(aes(time, percentage)) + | |
geom_col(fill = 'blue', colour = 'black') + | |
facet_wrap(~pattern_no) + | |
geom_text(data = ann_text,aes(label = lab), size = 2) + | |
scale_y_continuous(name = 'Percentage in each increment') + | |
scale_x_continuous(name = '') + | |
labs(title = 'Areal Temporal Patterns', | |
subtitle = 'Southern Slopes, duration = 7200 min (120 hours), area = 200 sq-km, \n40 increments of 180 min')+ | |
theme_gray(base_size = 7) | |
p | |
ggsave(file.path('../figures', 'southern_slopes_6884.png'), plot = p, width = 4, height = 3) | |
# Download Areal pattern increment files and all states files ----------------------------------------------------- | |
# There are 12 regions | |
# 1. Rangelands West | |
# 2. Wet Tropics | |
# 3. Rangelands | |
# 4. Central Slopes | |
# 5. Monsoonal North | |
# 6. Murray Basin | |
# 7. East Coast North | |
# 8. East Coast South | |
# 9. Southern Slopes Mainland | |
# 10. Southern Slopes Tasmania | |
# 11. East flatlands | |
# 12. West flatlands | |
# Two files for each region | |
# An increments file - which has the temporal patterns | |
# An Allstats file which as information on the storms that were used to generate the temporal patterns. | |
# Get all the Areal Increment files | |
increment_files <- list.files('../data') | |
increment_files <- str_subset(increment_files, '(Areal).*(Increments)') # Extract files that contain 'Areal' and 'Increments' | |
length(increment_files) # 12, as expected | |
increment_files | |
# [1] "Areal_CS_Increments.csv" "Areal_ECnorth_Increments.csv" "Areal_ECsouth_Increments.csv" "Areal_FLTeast_Increments.csv" | |
# [5] "Areal_FLTwest_Increments.csv" "Areal_MB_Increments.csv" "Areal_MN_Increments.csv" "Areal_R_Increments.csv" | |
# [9] "Areal_Rwest_Increments.csv" "Areal_SSmainland_Increments.csv" "Areal_SStas_Increments.csv" "Areal_WT_Increments.csv" | |
# list the All Stats files | |
str_subset(list.files('../data'), 'AllStats') # Extract files that contain 'AllStats' | |
# [1] "Areal_CS_AllStats.csv" "Areal_ECnorth_AllStats.csv" "Areal_ECsouth_AllStats.csv" | |
# [4] "Areal_FLTeast_AllStats.csv" "Areal_FLTwest_AllStats.csv" "Areal_MB_AllStats.csv" | |
# [7] "Areal_MN_AllStats.csv" "Areal_R_AllStats.csv" "Areal_Rwest_AllStats.csv" | |
# [10] "Areal_SSmainland_AllStats.csv" "Areal_SStas_AllStats.csv" "Areal_WT_AllStats.csv" | |
# link file names to region names | |
allstats_files <- set_names(c("Areal_CS_AllStats.csv", | |
"Areal_ECnorth_AllStats.csv", | |
"Areal_ECsouth_AllStats.csv", | |
"Areal_FLTeast_AllStats.csv", | |
"Areal_FLTwest_AllStats.csv", | |
"Areal_MB_AllStats.csv", | |
"Areal_MN_AllStats.csv", | |
"Areal_R_AllStats.csv", | |
"Areal_Rwest_AllStats.csv", | |
"Areal_SSmainland_AllStats.csv", | |
"Areal_SStas_AllStats.csv", | |
"Areal_WT_AllStats.csv"), | |
c('central_slopes', | |
'east_coast_north', | |
'east_coast_south', | |
'east_flatlands', | |
'west_flatlands', | |
'murray_basin', | |
'monsoonal_north', | |
'rangelands', | |
'rangelands_west', | |
'southern_slopes_mainland', | |
'southern_slopes_tas', | |
'wet_tropics')) | |
# Function to download an all stats file, convert to a dataframe and store the whole lot in a data frame | |
Get_storm_stats <- function(fname) { | |
# Read in all the patterns | |
# One pattern per line | |
#fname <- "Areal_CS_AllStats.csv" | |
storm_stats <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L) | |
# Split the strings at commas | |
storm_stats <- str_split(storm_stats, ',') | |
# Convert to data frame | |
storm_stats <- as_data_frame(rlang::exec('rbind', !!!storm_stats)) | |
# name the columns | |
names(storm_stats) <- c('eventID','region', 'region_source', | |
'duration', 'timestep', 'original_burst_depth_mm', | |
'area','aspect_ratio', 'rotation', | |
'burst_start_time', 'lat', 'lon') | |
storm_stats | |
} | |
# Get stats for all events | |
all_regions_stats <- map_df(.x = allstats_files, .f = Get_storm_stats) | |
all_regions_stats | |
# Function to calcuate the NUI | |
Chisquare_p <- function(p) { | |
p <- p[-(1:5)] # Remove the first 5 elements | |
p <- as.numeric(p) # Convert to numeric | |
p_e <- rep(100/length(p), times = length(p)) # Expected | |
chi <- sum(((p - p_e)^2)/p_e) # o - e | |
-log(-pchisq(chi, df = length(p) - 1, log.p = TRUE)) | |
# This transformation provides numbers that are in a range that are easy to work with | |
} | |
# Function to read in patterns, calculate NUI and return a data frame with information: | |
# eventID, duration_min, duration_hr, interval_min, region, NUI | |
# fname is the file name of the increments file | |
Find_bad_patterns <- function(fname) { | |
# Read in all the patterns | |
# One pattern per line | |
pat <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L) | |
# Split the strings at commas | |
pat <- str_split(pat, ',') | |
# Calculate the Chisquare statistic for each pattern | |
chisq_stat <- unlist(map(.x = pat, .f = Chisquare_p)) | |
# Function to grab the first 5 fields from a temporal pattern | |
# The first 5 entres are: | |
# 1. EventID | |
# 2. Duration (minutes) | |
# 3. Interval length (min) | |
# 4. Region | |
# 5. Area | |
First_five <- function(p) { | |
p <- p[1:5] | |
} | |
names(pat) <- 1:length(pat) # pat needs to have names for map_df to work | |
pat_db <- map_df(.x = pat, .f= First_five) # get first 5 fields | |
pat_db <- as_data_frame(t(as.matrix(pat_db))) # need to transpose to convert to data frame | |
names(pat_db) <- c('eventID', 'duration_min', 'interval_min', 'region', 'area') | |
# Add the non_uniformity index value NUI | |
pat_db <- pat_db %>% | |
mutate(NUI = chisq_stat) %>% | |
mutate(duration_min = as.integer(duration_min)) %>% | |
mutate(row_n = 1:length(pat)) %>% | |
mutate(duration_hr = as.integer(duration_min/60)) %>% | |
select(row_n, eventID, duration_min, duration_hr, interval_min, region, NUI) | |
# return the data for each region | |
pat_db %>% | |
arrange(desc(NUI)) | |
} | |
# Process all increment files to creat a summary for all patterns | |
all_patterns_sum <- map_df(.x = increment_files, .f = Find_bad_patterns ) | |
all_patterns_sum <- all_patterns_sum %>% arrange(desc(NUI)) | |
#Attach storm data to pattern summary | |
all_patterns_sum <- all_patterns_sum %>% | |
left_join(all_regions_stats, by = c("region" = "region", "eventID" = "eventID")) %>% | |
arrange(desc(NUI)) | |
#all_patterns_sum %>% View | |
all_patterns_sum %>% print(n = 21) | |
# Map locations ---------------------------------------------------------- | |
# Plot a graph of locations | |
# | |
# First n locations | |
my_rows = 1:16 | |
storm_map_df <- all_patterns_sum %>% | |
slice(my_rows) %>% | |
mutate_at(.vars = c('lat','lon'), .funs = as.numeric) %>% | |
add_column(num = my_rows) | |
# You need an API key for this to work see ?register_google | |
# Calculating the scale | |
# Source function that calculates distance (UTM) from degrees | |
source("https://gist.githubusercontent.com/TonyLadson/f37aab3e2ef517188a7f27166307c985/raw/0822970769bc90fcc28052a91b375399d665286e/UTM2deg.R") | |
# These eastings and northings are from the Jerangle area where the least uniform | |
# patterns occur | |
UTM2deg(easting = 712748.88, northing = 6021016.41, zone = 55, format_out = 'deg' ) | |
# lat lon | |
# 1 -35.93206 149.3583 | |
#If we move 20 km how many degrees? | |
UTM2deg(easting = 712748.88 + 20000, northing = 6021016.41, zone = 55, format_out = 'deg' ) | |
# lat lon | |
# 1 -35.92751 149.5798 | |
# Different in lon | |
149.5798 - 149.3583 # 0.22 | |
# Difference in lat | |
-35.93206 - -35.92751 #-0.00455 | |
# So 20 km is about 0.22 deg, we can use this to create an approximate scale | |
# Get segment of map of Australia for area south of Canberra | |
aust_map <- ggmap(get_map(location = c(lon =149.3375 , lat = -35.8875), zoom = 10), extent = 'device') | |
aust_map | |
# Specification of label locations | |
my_nudge_x = c(0, 0.02, 0, -0.01, 0.01, -0.01, 0.01, -0.01, 0, 0.015, 0, 0.015, 0, 0.02, 0, -0.02) | |
my_nudge_y = c(0.02, 0, -0.02, 0, 0.01, 0, 0, 0, 0.01, 0, -0.01, 0, 0.01, 0, -0.01, -0.01 ) | |
# Add points | |
# One point is in Queensland which won't be shown | |
p <- aust_map + | |
geom_point(aes(x = lon, y = lat), colour = 'red', size = 0.4, data = storm_map_df) + | |
geom_text(aes(x = lon, y = lat, label = my_rows), data = storm_map_df, size = 1.9, check_overlap = FALSE, | |
nudge_x = my_nudge_x, nudge_y = my_nudge_y) + | |
theme_minimal(base_size=5) + | |
labs(x = NULL, y = NULL) + | |
geom_segment(x = 149.0, xend = 149.0, y = -35.8, yend = -35.6, colour = 'black', # North arrow | |
arrow = arrow(length = unit(0.02, "npc"), type = 'closed')) + | |
geom_segment(x = 149, xend = 149.22 , y =-36.1 , yend =-36.1 , colour = 'black', # Scale | |
arrow = arrow(angle = 90, ends = 'both', length = unit(0.02, "npc"))) + | |
annotate(geom = 'text', x = 149.11, y = -36.08, label = '20 km', size = 1.5) | |
p | |
# Save for webpage | |
ggsave(file.path('../figures', 'storm_loc_map.png'), p, width = 4, height = 4) | |
# Process patterns for graphing ------------------------------------------- | |
# Function to create list of patterns | |
# Patterns need to be stored in a list rather than a data frame because the number of increments vary | |
List_patterns <- function(fname) { | |
pat <- read_lines(file = file.path('../data', fname ), skip = 1, n_max = -1L) | |
# Split the strings at commas | |
pat <- str_split(pat, ',') | |
# Get the region names and the event IDs so that we can name list | |
# elements and then pull individual patterns out. | |
# The region names are the 4th element and the pattern number are the first | |
patIDs <- map_chr(.x = 1:length(pat), .f = function(i)(str_c (pat[[i]][4], pat[[i]][1], sep = '_'))) | |
patIDs <- str_to_lower(patIDs) # convert to lower case | |
patIDs <- str_replace_all(patIDs, ' ', '_') # convert to snake case | |
names(pat) <- patIDs | |
return(pat) | |
} | |
# Apply to all increments files | |
all_increments <- map(.x = increment_files, .f = List_patterns) | |
# This produces a list of 12 lists | |
# Use purrr::flatten to make into a single list | |
all_increments <- purrr::flatten(all_increments) | |
#head(all_increments) | |
#tail(all_increments) | |
# What are the region names | |
all_names <- names(all_increments) | |
unique(str_replace(all_names, '[0-9]+', '')) | |
# [1] "central_slopes_" "east_coast_(north)_" "east_coast_(south)_" "s-sw_flatlands_(east)_" "s-sw_flatlands_(west)_" | |
# [6] "murray_basin_" "monsoonal_north_" "rangelands_" "rangelands_(west)_" "southern_slopes_(mainland)_" | |
# [11] "southern_slopes_(tasmania)_" "wet_tropics_" | |
# Function to graph a pattern given the region name and event id e.g. central_slopes_3565 | |
Graph_pattern <- function(pat_name_id) { | |
pat_data <- all_increments[[pat_name_id]] | |
my_eventID <- pat_data[1] | |
my_duration <- pat_data[2] | |
my_increments <- pat_data[3] | |
my_region <- pat_data[4] | |
my_area <- pat_data[5] | |
my_breaks <- seq(0, 60, by = 5) # The maximum number of increments is 56 | |
# so setting this to 60 will always work | |
my_title <- sprintf('Areal Temporal Pattern %s',my_eventID) | |
my_subtitle <- sprintf('Region = %s, Area = %s km-squared, \nDuration = %s min (%.0f hour), %i increments of %s min', | |
my_region, my_area, my_duration, as.numeric(my_duration)/60, as.numeric(my_duration)/as.numeric(my_increments), my_increments) | |
# remove header information just leaving numbers | |
pat_data <- as.numeric(pat_data[-1:-5] ) | |
# Create dataframe for plotting | |
pat_data_df <- data_frame(increments = 1:length(pat_data), percentages = pat_data ) | |
p <- pat_data_df %>% | |
ggplot(aes(x = increments, y = pat_data)) + | |
geom_col(fill = 'blue', colour = 'black') + | |
scale_y_continuous(name = 'Percentage in each increment') + | |
scale_x_continuous(breaks = my_breaks) + | |
labs(title = my_title, | |
subtitle = my_subtitle) + | |
theme_grey(base_size = 5) | |
# five minors per major | |
#https://stackoverflow.com/questions/39691591/ggplot2-integer-multiple-of-minor-breaks-per-major-break | |
majors <- ggplot_build(p)$layout$panel_params[[1]]$x.major_source | |
multiplier <- 5 | |
minors <- seq(from = min(majors), | |
to = max(majors), | |
length.out = ((length(majors) - 1) * multiplier) + 1) | |
p <- p + scale_x_continuous(name = 'Increments', breaks = my_breaks, minor_breaks = minors) | |
suppressMessages(print(p)) | |
} | |
# Create graphs for figures 4 and 5 in the blog | |
p1 <- Graph_pattern('murray_basin_5901') | |
p2 <-Graph_pattern('murray_basin_5644') | |
p3 <-Graph_pattern('murray_basin_5731') | |
p4 <-Graph_pattern('murray_basin_5898') | |
p <- p1 + p2 + p3 + p4 | |
p | |
ggsave(file.path('../figures', 'next4_patterns.png'), p, width = 4, height = 3) | |
p5 <- Graph_pattern('murray_basin_5810') | |
p6 <- Graph_pattern('central_slopes_4170') | |
p7 <- Graph_pattern('murray_basin_5822') | |
p8 <- Graph_pattern('monsoonal_north_5004') | |
p <- p5 + p6 + p7 + p8 | |
p | |
ggsave(file.path('../figures', 'next5_8_patterns.png'), p, width = 4, height = 3) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment