Last active
March 24, 2020 01:00
-
-
Save TonyLadson/d361aff7714da1c0b32fb252fadd0da5 to your computer and use it in GitHub Desktop.
Function to get preburst data from the data hub and convert it into a tidy format. See https://tonyladson.wordpress.com/2020/03/23/tidy-pre-burst-data/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Get_tidy_prebust <- function(lat, lon){ | |
require(RCurl) | |
require(stringr) | |
require(tidyverse) | |
require(glue) | |
# Functions_____________________________________________________________________________ | |
# Function to get the text file from the data hub for a latitude and longitude | |
Get_prebust_data <- function(lat, lon){ | |
require(RCurl) | |
require(glue) | |
# Checking Latitude and Longitude | |
# From https://gist.github.com/graydon/11198540 the bounding box for Australia | |
# 'AU': ('Australia', (113.338953078, -43.6345972634, 153.569469029, -10.6681857235)), | |
if(lat > -10.6681857235 | lat < -43.6345972634) stop("Points must be within Australia") | |
if(lon < 113.338953078 | lon > 153.569469029) stop("Points must be within Australia") | |
RCurl::getURI(glue("https://data.arr-software.org/?lon_coord={lon}&lat_coord={lat}&type=text&Preburst=1&OtherPreburst=1")) | |
} | |
# Function to extract the preburst data from the hub_data text file | |
Extract_preburst <- function(percentile = c(50, 10, 25, 75, 90), type = c("depth", "ratio"), hub_data ){ | |
require(tidyverse) | |
require(stringr) | |
# Check inputs-------------------- | |
if(identical(percentile, c(50, 10, 25, 75, 90))) percentile = 50 # take the first value as the default | |
if(!percentile %in% c(50, 10, 25, 75, 90) ) stop("percentile must be one of 10, 25, 50 75, 90") | |
if(identical(type, c("depth", "ratio"))) type = "depth" # take the first value as the default | |
if(!type %in% c("depth", "ratio")) stop("type must be either 'depth' or 'ratio'") | |
# Functions------------------------- | |
.Get_start_end <- function(percentile){ | |
# Given a percentile, get the start and end text to search in the data hub file | |
if(!percentile %in% c(10, 25, 50, 75, 90)) stop('Function Get_start_end expects percentile be one of 10,25, 50, 75, 90') | |
search_text <- case_when(percentile == 10 ~ c("PREBURST10","PREBURST10_META") , | |
percentile == 25 ~ c("PREBURST25","PREBURST25_META"), | |
percentile == 50 ~ c("PREBURST","PREBURST_META"), | |
percentile == 75 ~ c("PREBURST75","PREBURST75_META"), | |
percentile == 90 ~ c("PREBURST90","PREBURST90_META")) | |
return(search_text) | |
} | |
# Make search text | |
# | |
search_text <- .Get_start_end(percentile) | |
search_pattern = str_c("(?<=\\[", search_text[1],"\\])(.*)(?=\\[", search_text[2],"\\])") # Construct pattern for regex | |
pb_info <- str_extract(hub_data, regex(search_pattern, dotall = TRUE)) # extract the required pre-burst data from hub_data | |
if(type == 'depth'){ | |
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas | |
x3 <- str_replace_all(x2, '\\([^)]*\\)', '') # This removes brackets and everything between brackets | |
x3 <- str_split(x3, pattern = ",") # split at commas | |
x4 <- unlist(x3) # convert to vector | |
x5 <- x4[c(-1,-length(x4))] # remove first and last value | |
x5 <- x5[c(-1:-7)] # remove first seven values | |
x5 <- as.numeric(x5) | |
x6 <- matrix(x5, ncol = 7, byrow = TRUE) | |
colnames(x6) <- c('dur_min', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' ) | |
pb_depth <- as_tibble(x6, .name_repair = 'check_unique') | |
pb_depth <- add_column(pb_depth, dur_hour = pb_depth$dur_min/60, .before = 2) | |
pb_depth <- add_column(pb_depth, type = type) | |
pb_depth <- add_column(pb_depth, percentile = percentile) | |
return(pb_depth) | |
} | |
if(type == 'ratio'){ | |
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas | |
x3 <- str_extract_all(x2, '\\([^)]*\\)') # keeps brackets and everything in the brackets | |
x3 <- unlist(x3) | |
x4 <- str_replace_all(x3, '[()]', '') # remove the brackets | |
x5 <- x4[!x4 %in% c("h", "%")] # get rid of other things we don't need | |
x5 <- as.numeric(x5) | |
x6 <- matrix(x5, ncol = 7, byrow = TRUE) # matrix of rainfall ratios | |
colnames(x6) <- c('dur_hour', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' ) | |
pb_ratio <- as_tibble(x6, .name_repair = 'check_unique') # confert to data frame | |
pb_ratio <- add_column(pb_ratio, dur_min = pb_ratio$dur_hour*60, .before = 1) # add a column of minutes for consistency | |
pb_ratio <- add_column(pb_ratio, type = type) | |
pb_ratio <- add_column(pb_ratio, percentile = percentile) | |
return(pb_ratio) | |
} | |
stop(str_c("Error in function Get_preburst. Expecting type = 'depth', or 'ratio'. Type = ", type )) | |
} | |
# Main routine______________________________________________________________________________ | |
hub_data <- Get_prebust_data(lat, lon) | |
# Assemble a data frame | |
# build a data frame that contains all the percentiles and types of interest | |
request_data <- expand.grid(percentile = c(10, 25, 50, 75, 90), type = c('depth', 'ratio')) | |
# Loop through all the percentiles and ratio and assemble a tidy data frame of the results | |
tidy_preburst <- bind_rows(pmap(.l = request_data, .f = Extract_preburst, hub_data = hub_data)) | |
tidy_preburst %>% | |
gather(-dur_min, -dur_hour, -type, -percentile, key = 'AEP', value = 'preburst') %>% | |
mutate(AEP = as.integer(str_remove(AEP, "AEP_"))) # reformat the AEP values to be integers | |
} | |
# Function to get pre-burst data for a given latitude and longitude | |
# returns a text file from the data hub | |
Axe = Get_tidy_prebust(lat =-36.9, lon = 144.36) | |
# Make facet labels | |
AEP.labs <- str_c('AEP', c(1,2,5,10,20,50),'%') | |
names(AEP.labs) <- c(1,2,5,10,20,50) | |
p <- Axe %>% | |
filter(type == 'ratio') %>% | |
ggplot(aes(x = dur_hour, y = preburst, colour = factor(percentile))) + | |
geom_line(size = 0.25) + | |
geom_point(size = 0.5)+ | |
facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) + | |
labs(y = 'Preburst ratio', | |
x = 'Duration (hour)') + | |
scale_colour_discrete(name = 'Percentile') + | |
theme_gray(base_size = 5) | |
p | |
ggsave(file.path('figures', 'Axe_dur_ratio.png'), p, width = 4, height = 3) | |
p1 <- Axe %>% | |
filter(type == 'ratio') %>% | |
ggplot(aes(x = percentile, y = preburst, colour = factor(dur_hour))) + | |
geom_line(size = 0.25) + | |
geom_point(size = 0.5)+ | |
facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) + | |
labs(y = 'Preburst ratio', | |
x = 'Percentile') + | |
scale_colour_discrete(name = 'Duration (hour)') + | |
theme_gray(base_size = 5) | |
p1 | |
ggsave(file.path('figures', 'Axe_percentile_ratio.png'), p1, width = 4, height = 3) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment