Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active April 2, 2019 21:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/d203a2c41cf3ac124f128edba0fa8785 to your computer and use it in GitHub Desktop.
Save TonyLadson/d203a2c41cf3ac124f128edba0fa8785 to your computer and use it in GitHub Desktop.
Code to analyse temporal patterns from the Australian Rainfall and Runoff data hub. See bloghttps://tonyladson.wordpress.com/2019/04/02/check-arr-temporal-patterns-before-you-use-them/
# All data files are available at https://drive.google.com/open?id=1-RSPFSuAbOFZRIivO8YHfNlBJijwcMRT
# Packages ----------------------------------------------------------------
devtools::install_github("thomasp85/patchwork")
library(tidyverse)
library(stringr)
library(rlang)
library(ggmap)
library(patchwork)
# Load data for Figure 2 in blog and plot ------------------------------------------
list.files('../data')
pat <- read_csv(file = file.path('../data', "Areal_pattern_7200min_200km2_SS.csv" ), skip = 1 )
glimpse(pat)
max(pat)
#[1] 48.82
ann_text <- data.frame(time = 30, percentage = 35, lab = "ID = 6884", hjust = 'left',
pattern_no = factor('X7', levels = str_c('X', 1:10), ordered = TRUE ))
p <- pat %>%
mutate(time = 1:nrow(pat)) %>%
gather(-time, key = 'pattern_no', value = 'percentage') %>%
mutate(pattern_no = factor(pattern_no, levels = str_c('X', 1:10), ordered = TRUE )) %>%
ggplot(aes(time, percentage)) +
geom_col(fill = 'blue', colour = 'black') +
facet_wrap(~pattern_no) +
geom_text(data = ann_text,aes(label = lab), size = 2) +
scale_y_continuous(name = 'Percentage in each increment') +
scale_x_continuous(name = '') +
labs(title = 'Areal Temporal Patterns',
subtitle = 'Southern Slopes, duration = 7200 min (120 hours), area = 200 sq-km, \n40 increments of 180 min')+
theme_gray(base_size = 7)
p
ggsave(file.path('../figures', 'southern_slopes_6884.png'), plot = p, width = 4, height = 3)
# Download Areal pattern increment files and all states files -----------------------------------------------------
# There are 12 regions
# 1. Rangelands West
# 2. Wet Tropics
# 3. Rangelands
# 4. Central Slopes
# 5. Monsoonal North
# 6. Murray Basin
# 7. East Coast North
# 8. East Coast South
# 9. Southern Slopes Mainland
# 10. Southern Slopes Tasmania
# 11. East flatlands
# 12. West flatlands
# Two files for each region
# An increments file - which has the temporal patterns
# An Allstats file which as information on the storms that were used to generate the temporal patterns.
# Get all the Areal Increment files
increment_files <- list.files('../data')
increment_files <- str_subset(increment_files, '(Areal).*(Increments)') # Extract files that contain 'Areal' and 'Increments'
length(increment_files) # 12, as expected
increment_files
# [1] "Areal_CS_Increments.csv" "Areal_ECnorth_Increments.csv" "Areal_ECsouth_Increments.csv" "Areal_FLTeast_Increments.csv"
# [5] "Areal_FLTwest_Increments.csv" "Areal_MB_Increments.csv" "Areal_MN_Increments.csv" "Areal_R_Increments.csv"
# [9] "Areal_Rwest_Increments.csv" "Areal_SSmainland_Increments.csv" "Areal_SStas_Increments.csv" "Areal_WT_Increments.csv"
# list the All Stats files
str_subset(list.files('../data'), 'AllStats') # Extract files that contain 'AllStats'
# [1] "Areal_CS_AllStats.csv" "Areal_ECnorth_AllStats.csv" "Areal_ECsouth_AllStats.csv"
# [4] "Areal_FLTeast_AllStats.csv" "Areal_FLTwest_AllStats.csv" "Areal_MB_AllStats.csv"
# [7] "Areal_MN_AllStats.csv" "Areal_R_AllStats.csv" "Areal_Rwest_AllStats.csv"
# [10] "Areal_SSmainland_AllStats.csv" "Areal_SStas_AllStats.csv" "Areal_WT_AllStats.csv"
# link file names to region names
allstats_files <- set_names(c("Areal_CS_AllStats.csv",
"Areal_ECnorth_AllStats.csv",
"Areal_ECsouth_AllStats.csv",
"Areal_FLTeast_AllStats.csv",
"Areal_FLTwest_AllStats.csv",
"Areal_MB_AllStats.csv",
"Areal_MN_AllStats.csv",
"Areal_R_AllStats.csv",
"Areal_Rwest_AllStats.csv",
"Areal_SSmainland_AllStats.csv",
"Areal_SStas_AllStats.csv",
"Areal_WT_AllStats.csv"),
c('central_slopes',
'east_coast_north',
'east_coast_south',
'east_flatlands',
'west_flatlands',
'murray_basin',
'monsoonal_north',
'rangelands',
'rangelands_west',
'southern_slopes_mainland',
'southern_slopes_tas',
'wet_tropics'))
# Function to download an all stats file, convert to a dataframe and store the whole lot in a data frame
Get_storm_stats <- function(fname) {
# Read in all the patterns
# One pattern per line
#fname <- "Areal_CS_AllStats.csv"
storm_stats <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)
# Split the strings at commas
storm_stats <- str_split(storm_stats, ',')
# Convert to data frame
storm_stats <- as_data_frame(rlang::exec('rbind', !!!storm_stats))
# name the columns
names(storm_stats) <- c('eventID','region', 'region_source',
'duration', 'timestep', 'original_burst_depth_mm',
'area','aspect_ratio', 'rotation',
'burst_start_time', 'lat', 'lon')
storm_stats
}
# Get stats for all events
all_regions_stats <- map_df(.x = allstats_files, .f = Get_storm_stats)
all_regions_stats
# Function to calcuate the NUI
Chisquare_p <- function(p) {
p <- p[-(1:5)] # Remove the first 5 elements
p <- as.numeric(p) # Convert to numeric
p_e <- rep(100/length(p), times = length(p)) # Expected
chi <- sum(((p - p_e)^2)/p_e) # o - e
-log(-pchisq(chi, df = length(p) - 1, log.p = TRUE))
# This transformation provides numbers that are in a range that are easy to work with
}
# Function to read in patterns, calculate NUI and return a data frame with information:
# eventID, duration_min, duration_hr, interval_min, region, NUI
# fname is the file name of the increments file
Find_bad_patterns <- function(fname) {
# Read in all the patterns
# One pattern per line
pat <- read_lines(file = file.path('../data', fname), skip = 1, n_max = -1L)
# Split the strings at commas
pat <- str_split(pat, ',')
# Calculate the Chisquare statistic for each pattern
chisq_stat <- unlist(map(.x = pat, .f = Chisquare_p))
# Function to grab the first 5 fields from a temporal pattern
# The first 5 entres are:
# 1. EventID
# 2. Duration (minutes)
# 3. Interval length (min)
# 4. Region
# 5. Area
First_five <- function(p) {
p <- p[1:5]
}
names(pat) <- 1:length(pat) # pat needs to have names for map_df to work
pat_db <- map_df(.x = pat, .f= First_five) # get first 5 fields
pat_db <- as_data_frame(t(as.matrix(pat_db))) # need to transpose to convert to data frame
names(pat_db) <- c('eventID', 'duration_min', 'interval_min', 'region', 'area')
# Add the non_uniformity index value NUI
pat_db <- pat_db %>%
mutate(NUI = chisq_stat) %>%
mutate(duration_min = as.integer(duration_min)) %>%
mutate(row_n = 1:length(pat)) %>%
mutate(duration_hr = as.integer(duration_min/60)) %>%
select(row_n, eventID, duration_min, duration_hr, interval_min, region, NUI)
# return the data for each region
pat_db %>%
arrange(desc(NUI))
}
# Process all increment files to creat a summary for all patterns
all_patterns_sum <- map_df(.x = increment_files, .f = Find_bad_patterns )
all_patterns_sum <- all_patterns_sum %>% arrange(desc(NUI))
#Attach storm data to pattern summary
all_patterns_sum <- all_patterns_sum %>%
left_join(all_regions_stats, by = c("region" = "region", "eventID" = "eventID")) %>%
arrange(desc(NUI))
#all_patterns_sum %>% View
all_patterns_sum %>% print(n = 21)
# Map locations ----------------------------------------------------------
# Plot a graph of locations
#
# First n locations
my_rows = 1:16
storm_map_df <- all_patterns_sum %>%
slice(my_rows) %>%
mutate_at(.vars = c('lat','lon'), .funs = as.numeric) %>%
add_column(num = my_rows)
# You need an API key for this to work see ?register_google
# Calculating the scale
# Source function that calculates distance (UTM) from degrees
source("https://gist.githubusercontent.com/TonyLadson/f37aab3e2ef517188a7f27166307c985/raw/0822970769bc90fcc28052a91b375399d665286e/UTM2deg.R")
# These eastings and northings are from the Jerangle area where the least uniform
# patterns occur
UTM2deg(easting = 712748.88, northing = 6021016.41, zone = 55, format_out = 'deg' )
# lat lon
# 1 -35.93206 149.3583
#If we move 20 km how many degrees?
UTM2deg(easting = 712748.88 + 20000, northing = 6021016.41, zone = 55, format_out = 'deg' )
# lat lon
# 1 -35.92751 149.5798
# Different in lon
149.5798 - 149.3583 # 0.22
# Difference in lat
-35.93206 - -35.92751 #-0.00455
# So 20 km is about 0.22 deg, we can use this to create an approximate scale
# Get segment of map of Australia for area south of Canberra
aust_map <- ggmap(get_map(location = c(lon =149.3375 , lat = -35.8875), zoom = 10), extent = 'device')
aust_map
# Specification of label locations
my_nudge_x = c(0, 0.02, 0, -0.01, 0.01, -0.01, 0.01, -0.01, 0, 0.015, 0, 0.015, 0, 0.02, 0, -0.02)
my_nudge_y = c(0.02, 0, -0.02, 0, 0.01, 0, 0, 0, 0.01, 0, -0.01, 0, 0.01, 0, -0.01, -0.01 )
# Add points
# One point is in Queensland which won't be shown
p <- aust_map +
geom_point(aes(x = lon, y = lat), colour = 'red', size = 0.4, data = storm_map_df) +
geom_text(aes(x = lon, y = lat, label = my_rows), data = storm_map_df, size = 1.9, check_overlap = FALSE,
nudge_x = my_nudge_x, nudge_y = my_nudge_y) +
theme_minimal(base_size=5) +
labs(x = NULL, y = NULL) +
geom_segment(x = 149.0, xend = 149.0, y = -35.8, yend = -35.6, colour = 'black', # North arrow
arrow = arrow(length = unit(0.02, "npc"), type = 'closed')) +
geom_segment(x = 149, xend = 149.22 , y =-36.1 , yend =-36.1 , colour = 'black', # Scale
arrow = arrow(angle = 90, ends = 'both', length = unit(0.02, "npc"))) +
annotate(geom = 'text', x = 149.11, y = -36.08, label = '20 km', size = 1.5)
p
# Save for webpage
ggsave(file.path('../figures', 'storm_loc_map.png'), p, width = 4, height = 4)
# Process patterns for graphing -------------------------------------------
# Function to create list of patterns
# Patterns need to be stored in a list rather than a data frame because the number of increments vary
List_patterns <- function(fname) {
pat <- read_lines(file = file.path('../data', fname ), skip = 1, n_max = -1L)
# Split the strings at commas
pat <- str_split(pat, ',')
# Get the region names and the event IDs so that we can name list
# elements and then pull individual patterns out.
# The region names are the 4th element and the pattern number are the first
patIDs <- map_chr(.x = 1:length(pat), .f = function(i)(str_c (pat[[i]][4], pat[[i]][1], sep = '_')))
patIDs <- str_to_lower(patIDs) # convert to lower case
patIDs <- str_replace_all(patIDs, ' ', '_') # convert to snake case
names(pat) <- patIDs
return(pat)
}
# Apply to all increments files
all_increments <- map(.x = increment_files, .f = List_patterns)
# This produces a list of 12 lists
# Use purrr::flatten to make into a single list
all_increments <- purrr::flatten(all_increments)
#head(all_increments)
#tail(all_increments)
# What are the region names
all_names <- names(all_increments)
unique(str_replace(all_names, '[0-9]+', ''))
# [1] "central_slopes_" "east_coast_(north)_" "east_coast_(south)_" "s-sw_flatlands_(east)_" "s-sw_flatlands_(west)_"
# [6] "murray_basin_" "monsoonal_north_" "rangelands_" "rangelands_(west)_" "southern_slopes_(mainland)_"
# [11] "southern_slopes_(tasmania)_" "wet_tropics_"
# Function to graph a pattern given the region name and event id e.g. central_slopes_3565
Graph_pattern <- function(pat_name_id) {
pat_data <- all_increments[[pat_name_id]]
my_eventID <- pat_data[1]
my_duration <- pat_data[2]
my_increments <- pat_data[3]
my_region <- pat_data[4]
my_area <- pat_data[5]
my_breaks <- seq(0, 60, by = 5) # The maximum number of increments is 56
# so setting this to 60 will always work
my_title <- sprintf('Areal Temporal Pattern %s',my_eventID)
my_subtitle <- sprintf('Region = %s, Area = %s km-squared, \nDuration = %s min (%.0f hour), %i increments of %s min',
my_region, my_area, my_duration, as.numeric(my_duration)/60, as.numeric(my_duration)/as.numeric(my_increments), my_increments)
# remove header information just leaving numbers
pat_data <- as.numeric(pat_data[-1:-5] )
# Create dataframe for plotting
pat_data_df <- data_frame(increments = 1:length(pat_data), percentages = pat_data )
p <- pat_data_df %>%
ggplot(aes(x = increments, y = pat_data)) +
geom_col(fill = 'blue', colour = 'black') +
scale_y_continuous(name = 'Percentage in each increment') +
scale_x_continuous(breaks = my_breaks) +
labs(title = my_title,
subtitle = my_subtitle) +
theme_grey(base_size = 5)
# five minors per major
#https://stackoverflow.com/questions/39691591/ggplot2-integer-multiple-of-minor-breaks-per-major-break
majors <- ggplot_build(p)$layout$panel_params[[1]]$x.major_source
multiplier <- 5
minors <- seq(from = min(majors),
to = max(majors),
length.out = ((length(majors) - 1) * multiplier) + 1)
p <- p + scale_x_continuous(name = 'Increments', breaks = my_breaks, minor_breaks = minors)
suppressMessages(print(p))
}
# Create graphs for figures 4 and 5 in the blog
p1 <- Graph_pattern('murray_basin_5901')
p2 <-Graph_pattern('murray_basin_5644')
p3 <-Graph_pattern('murray_basin_5731')
p4 <-Graph_pattern('murray_basin_5898')
p <- p1 + p2 + p3 + p4
p
ggsave(file.path('../figures', 'next4_patterns.png'), p, width = 4, height = 3)
p5 <- Graph_pattern('murray_basin_5810')
p6 <- Graph_pattern('central_slopes_4170')
p7 <- Graph_pattern('murray_basin_5822')
p8 <- Graph_pattern('monsoonal_north_5004')
p <- p5 + p6 + p7 + p8
p
ggsave(file.path('../figures', 'next5_8_patterns.png'), p, width = 4, height = 3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment