Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active March 24, 2020 01:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/d361aff7714da1c0b32fb252fadd0da5 to your computer and use it in GitHub Desktop.
Save TonyLadson/d361aff7714da1c0b32fb252fadd0da5 to your computer and use it in GitHub Desktop.
Function to get preburst data from the data hub and convert it into a tidy format. See https://tonyladson.wordpress.com/2020/03/23/tidy-pre-burst-data/
Get_tidy_prebust <- function(lat, lon){
require(RCurl)
require(stringr)
require(tidyverse)
require(glue)
# Functions_____________________________________________________________________________
# Function to get the text file from the data hub for a latitude and longitude
Get_prebust_data <- function(lat, lon){
require(RCurl)
require(glue)
# Checking Latitude and Longitude
# From https://gist.github.com/graydon/11198540 the bounding box for Australia
# 'AU': ('Australia', (113.338953078, -43.6345972634, 153.569469029, -10.6681857235)),
if(lat > -10.6681857235 | lat < -43.6345972634) stop("Points must be within Australia")
if(lon < 113.338953078 | lon > 153.569469029) stop("Points must be within Australia")
RCurl::getURI(glue("https://data.arr-software.org/?lon_coord={lon}&lat_coord={lat}&type=text&Preburst=1&OtherPreburst=1"))
}
# Function to extract the preburst data from the hub_data text file
Extract_preburst <- function(percentile = c(50, 10, 25, 75, 90), type = c("depth", "ratio"), hub_data ){
require(tidyverse)
require(stringr)
# Check inputs--------------------
if(identical(percentile, c(50, 10, 25, 75, 90))) percentile = 50 # take the first value as the default
if(!percentile %in% c(50, 10, 25, 75, 90) ) stop("percentile must be one of 10, 25, 50 75, 90")
if(identical(type, c("depth", "ratio"))) type = "depth" # take the first value as the default
if(!type %in% c("depth", "ratio")) stop("type must be either 'depth' or 'ratio'")
# Functions-------------------------
.Get_start_end <- function(percentile){
# Given a percentile, get the start and end text to search in the data hub file
if(!percentile %in% c(10, 25, 50, 75, 90)) stop('Function Get_start_end expects percentile be one of 10,25, 50, 75, 90')
search_text <- case_when(percentile == 10 ~ c("PREBURST10","PREBURST10_META") ,
percentile == 25 ~ c("PREBURST25","PREBURST25_META"),
percentile == 50 ~ c("PREBURST","PREBURST_META"),
percentile == 75 ~ c("PREBURST75","PREBURST75_META"),
percentile == 90 ~ c("PREBURST90","PREBURST90_META"))
return(search_text)
}
# Make search text
#
search_text <- .Get_start_end(percentile)
search_pattern = str_c("(?<=\\[", search_text[1],"\\])(.*)(?=\\[", search_text[2],"\\])") # Construct pattern for regex
pb_info <- str_extract(hub_data, regex(search_pattern, dotall = TRUE)) # extract the required pre-burst data from hub_data
if(type == 'depth'){
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
x3 <- str_replace_all(x2, '\\([^)]*\\)', '') # This removes brackets and everything between brackets
x3 <- str_split(x3, pattern = ",") # split at commas
x4 <- unlist(x3) # convert to vector
x5 <- x4[c(-1,-length(x4))] # remove first and last value
x5 <- x5[c(-1:-7)] # remove first seven values
x5 <- as.numeric(x5)
x6 <- matrix(x5, ncol = 7, byrow = TRUE)
colnames(x6) <- c('dur_min', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
pb_depth <- as_tibble(x6, .name_repair = 'check_unique')
pb_depth <- add_column(pb_depth, dur_hour = pb_depth$dur_min/60, .before = 2)
pb_depth <- add_column(pb_depth, type = type)
pb_depth <- add_column(pb_depth, percentile = percentile)
return(pb_depth)
}
if(type == 'ratio'){
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
x3 <- str_extract_all(x2, '\\([^)]*\\)') # keeps brackets and everything in the brackets
x3 <- unlist(x3)
x4 <- str_replace_all(x3, '[()]', '') # remove the brackets
x5 <- x4[!x4 %in% c("h", "%")] # get rid of other things we don't need
x5 <- as.numeric(x5)
x6 <- matrix(x5, ncol = 7, byrow = TRUE) # matrix of rainfall ratios
colnames(x6) <- c('dur_hour', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
pb_ratio <- as_tibble(x6, .name_repair = 'check_unique') # confert to data frame
pb_ratio <- add_column(pb_ratio, dur_min = pb_ratio$dur_hour*60, .before = 1) # add a column of minutes for consistency
pb_ratio <- add_column(pb_ratio, type = type)
pb_ratio <- add_column(pb_ratio, percentile = percentile)
return(pb_ratio)
}
stop(str_c("Error in function Get_preburst. Expecting type = 'depth', or 'ratio'. Type = ", type ))
}
# Main routine______________________________________________________________________________
hub_data <- Get_prebust_data(lat, lon)
# Assemble a data frame
# build a data frame that contains all the percentiles and types of interest
request_data <- expand.grid(percentile = c(10, 25, 50, 75, 90), type = c('depth', 'ratio'))
# Loop through all the percentiles and ratio and assemble a tidy data frame of the results
tidy_preburst <- bind_rows(pmap(.l = request_data, .f = Extract_preburst, hub_data = hub_data))
tidy_preburst %>%
gather(-dur_min, -dur_hour, -type, -percentile, key = 'AEP', value = 'preburst') %>%
mutate(AEP = as.integer(str_remove(AEP, "AEP_"))) # reformat the AEP values to be integers
}
# Function to get pre-burst data for a given latitude and longitude
# returns a text file from the data hub
Axe = Get_tidy_prebust(lat =-36.9, lon = 144.36)
# Make facet labels
AEP.labs <- str_c('AEP', c(1,2,5,10,20,50),'%')
names(AEP.labs) <- c(1,2,5,10,20,50)
p <- Axe %>%
filter(type == 'ratio') %>%
ggplot(aes(x = dur_hour, y = preburst, colour = factor(percentile))) +
geom_line(size = 0.25) +
geom_point(size = 0.5)+
facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) +
labs(y = 'Preburst ratio',
x = 'Duration (hour)') +
scale_colour_discrete(name = 'Percentile') +
theme_gray(base_size = 5)
p
ggsave(file.path('figures', 'Axe_dur_ratio.png'), p, width = 4, height = 3)
p1 <- Axe %>%
filter(type == 'ratio') %>%
ggplot(aes(x = percentile, y = preburst, colour = factor(dur_hour))) +
geom_line(size = 0.25) +
geom_point(size = 0.5)+
facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) +
labs(y = 'Preburst ratio',
x = 'Percentile') +
scale_colour_discrete(name = 'Duration (hour)') +
theme_gray(base_size = 5)
p1
ggsave(file.path('figures', 'Axe_percentile_ratio.png'), p1, width = 4, height = 3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment