TonyLadson/Get_tidy_preburst.R

## Get_tidy_preburst.R
Get_tidy_prebust <- function(lat, lon){

  require(RCurl)
  require(stringr)
  require(tidyverse)
  require(glue)

  # Functions_____________________________________________________________________________

  # Function to get the text file from the data hub for a latitude and longitude

  Get_prebust_data <- function(lat, lon){

    require(RCurl)
    require(glue)

    # Checking Latitude and Longitude
    # From https://gist.github.com/graydon/11198540 the bounding box for Australia
    # 'AU': ('Australia', (113.338953078, -43.6345972634, 153.569469029, -10.6681857235)),

    if(lat > -10.6681857235 | lat < -43.6345972634) stop("Points must be within Australia")
    if(lon < 113.338953078 | lon > 153.569469029) stop("Points must be within Australia")

    RCurl::getURI(glue("https://data.arr-software.org/?lon_coord={lon}&lat_coord={lat}&type=text&Preburst=1&OtherPreburst=1"))

  }

  # Function to extract the preburst data from the hub_data text file

  Extract_preburst <- function(percentile = c(50, 10, 25, 75, 90), type = c("depth", "ratio"), hub_data ){

    require(tidyverse)
    require(stringr)


    # Check inputs--------------------

    if(identical(percentile, c(50, 10, 25, 75, 90))) percentile = 50 # take the first value as the default
    if(!percentile %in% c(50, 10, 25, 75, 90) ) stop("percentile must be one of 10, 25, 50 75, 90")


    if(identical(type, c("depth", "ratio"))) type = "depth" # take the first value as the default
    if(!type %in% c("depth", "ratio")) stop("type must be either 'depth' or 'ratio'")


    # Functions-------------------------

    .Get_start_end <- function(percentile){
      # Given a percentile, get the start and end text to search in the data hub file

      if(!percentile %in% c(10, 25, 50, 75, 90)) stop('Function Get_start_end expects percentile be one of 10,25, 50, 75, 90')

      search_text <- case_when(percentile == 10 ~ c("PREBURST10","PREBURST10_META") ,
                               percentile == 25 ~ c("PREBURST25","PREBURST25_META"),
                               percentile == 50 ~ c("PREBURST","PREBURST_META"),
                               percentile == 75 ~ c("PREBURST75","PREBURST75_META"),
                               percentile == 90 ~ c("PREBURST90","PREBURST90_META"))

      return(search_text)
    }


    # Make search text
    #

    search_text <- .Get_start_end(percentile)

    search_pattern = str_c("(?<=\\[", search_text[1],"\\])(.*)(?=\\[", search_text[2],"\\])") # Construct pattern for regex

    pb_info <- str_extract(hub_data, regex(search_pattern, dotall = TRUE)) # extract the required pre-burst data from hub_data

    if(type == 'depth'){
      x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
      x3 <- str_replace_all(x2, '\\([^)]*\\)', '')  # This removes brackets and everything between brackets
      x3 <- str_split(x3, pattern = ",") # split at commas
      x4 <- unlist(x3) # convert to vector
      x5 <- x4[c(-1,-length(x4))] # remove first and last value
      x5 <- x5[c(-1:-7)] # remove first seven values
      x5 <- as.numeric(x5)
      x6 <- matrix(x5, ncol = 7, byrow = TRUE)
      colnames(x6) <- c('dur_min', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
      pb_depth <- as_tibble(x6, .name_repair = 'check_unique')
      pb_depth <- add_column(pb_depth, dur_hour = pb_depth$dur_min/60, .before = 2)
      pb_depth <- add_column(pb_depth, type = type)
      pb_depth <- add_column(pb_depth, percentile = percentile)
      return(pb_depth)
    }

    if(type == 'ratio'){
      x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
      x3 <- str_extract_all(x2, '\\([^)]*\\)')  # keeps brackets and everything in the brackets
      x3 <- unlist(x3)
      x4 <- str_replace_all(x3, '[()]', '') # remove the brackets
      x5 <- x4[!x4 %in% c("h", "%")] # get rid of other things we don't need
      x5 <- as.numeric(x5)
      x6 <- matrix(x5, ncol = 7, byrow = TRUE) # matrix of rainfall ratios
      colnames(x6) <- c('dur_hour', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
      pb_ratio <- as_tibble(x6, .name_repair = 'check_unique') # confert to data frame
      pb_ratio <- add_column(pb_ratio, dur_min = pb_ratio$dur_hour*60, .before = 1) # add a column of minutes for consistency
      pb_ratio <- add_column(pb_ratio, type = type)
      pb_ratio <- add_column(pb_ratio, percentile = percentile)
      return(pb_ratio)
    }

    stop(str_c("Error in function Get_preburst.  Expecting type = 'depth', or 'ratio'. Type = ", type ))

  }

  # Main routine______________________________________________________________________________


  hub_data <- Get_prebust_data(lat, lon)


  # Assemble a data frame

  # build a data frame that contains all the percentiles and types of interest

  request_data <- expand.grid(percentile = c(10, 25, 50, 75, 90), type = c('depth', 'ratio'))

  # Loop through all the percentiles and ratio and assemble a tidy data frame of the results

  tidy_preburst <- bind_rows(pmap(.l = request_data, .f = Extract_preburst, hub_data = hub_data))

  tidy_preburst %>%
    gather(-dur_min, -dur_hour, -type, -percentile, key = 'AEP', value = 'preburst') %>%
    mutate(AEP = as.integer(str_remove(AEP, "AEP_")))  # reformat the AEP values to be integers


}


# Function to get pre-burst data for a given latitude and longitude
# returns a text file from the data hub

Axe = Get_tidy_prebust(lat =-36.9, lon = 144.36)


# Make facet labels
AEP.labs <- str_c('AEP', c(1,2,5,10,20,50),'%')
names(AEP.labs) <- c(1,2,5,10,20,50)

p <- Axe %>%
  filter(type == 'ratio') %>%
  ggplot(aes(x = dur_hour, y = preburst, colour = factor(percentile))) +
  geom_line(size = 0.25) +
  geom_point(size = 0.5)+
  facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) +
  labs(y = 'Preburst ratio',
       x = 'Duration (hour)') +
  scale_colour_discrete(name = 'Percentile') +
  theme_gray(base_size = 5)

p

ggsave(file.path('figures', 'Axe_dur_ratio.png'), p, width = 4, height = 3)


p1 <- Axe %>%
  filter(type == 'ratio') %>%
  ggplot(aes(x = percentile, y = preburst, colour = factor(dur_hour))) +
  geom_line(size = 0.25) +
  geom_point(size = 0.5)+
  facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) +
  labs(y = 'Preburst ratio',
       x = 'Percentile') +
  scale_colour_discrete(name = 'Duration (hour)') +
  theme_gray(base_size = 5)
p1

ggsave(file.path('figures', 'Axe_percentile_ratio.png'), p1, width = 4, height = 3)
	Get_tidy_prebust <- function(lat, lon){

	require(RCurl)
	require(stringr)
	require(tidyverse)
	require(glue)

	# Functions_____________________________________________________________________________

	# Function to get the text file from the data hub for a latitude and longitude

	Get_prebust_data <- function(lat, lon){

	require(RCurl)
	require(glue)

	# Checking Latitude and Longitude
	# From https://gist.github.com/graydon/11198540 the bounding box for Australia
	# 'AU': ('Australia', (113.338953078, -43.6345972634, 153.569469029, -10.6681857235)),

	if(lat > -10.6681857235 \| lat < -43.6345972634) stop("Points must be within Australia")
	if(lon < 113.338953078 \| lon > 153.569469029) stop("Points must be within Australia")

	RCurl::getURI(glue("https://data.arr-software.org/?lon_coord={lon}&lat_coord={lat}&type=text&Preburst=1&OtherPreburst=1"))

	}

	# Function to extract the preburst data from the hub_data text file

	Extract_preburst <- function(percentile = c(50, 10, 25, 75, 90), type = c("depth", "ratio"), hub_data ){

	require(tidyverse)
	require(stringr)



	# Check inputs--------------------

	if(identical(percentile, c(50, 10, 25, 75, 90))) percentile = 50 # take the first value as the default
	if(!percentile %in% c(50, 10, 25, 75, 90) ) stop("percentile must be one of 10, 25, 50 75, 90")


	if(identical(type, c("depth", "ratio"))) type = "depth" # take the first value as the default
	if(!type %in% c("depth", "ratio")) stop("type must be either 'depth' or 'ratio'")


	# Functions-------------------------

	.Get_start_end <- function(percentile){
	# Given a percentile, get the start and end text to search in the data hub file

	if(!percentile %in% c(10, 25, 50, 75, 90)) stop('Function Get_start_end expects percentile be one of 10,25, 50, 75, 90')

	search_text <- case_when(percentile == 10 ~ c("PREBURST10","PREBURST10_META") ,
	percentile == 25 ~ c("PREBURST25","PREBURST25_META"),
	percentile == 50 ~ c("PREBURST","PREBURST_META"),
	percentile == 75 ~ c("PREBURST75","PREBURST75_META"),
	percentile == 90 ~ c("PREBURST90","PREBURST90_META"))

	return(search_text)
	}


	# Make search text
	#

	search_text <- .Get_start_end(percentile)

	search_pattern = str_c("(?<=\\[", search_text[1],"\\])(.*)(?=\\[", search_text[2],"\\])") # Construct pattern for regex

	pb_info <- str_extract(hub_data, regex(search_pattern, dotall = TRUE)) # extract the required pre-burst data from hub_data

	if(type == 'depth'){
	x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
	x3 <- str_replace_all(x2, '\\([^)]*\\)', '') # This removes brackets and everything between brackets
	x3 <- str_split(x3, pattern = ",") # split at commas
	x4 <- unlist(x3) # convert to vector
	x5 <- x4[c(-1,-length(x4))] # remove first and last value
	x5 <- x5[c(-1:-7)] # remove first seven values
	x5 <- as.numeric(x5)
	x6 <- matrix(x5, ncol = 7, byrow = TRUE)
	colnames(x6) <- c('dur_min', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
	pb_depth <- as_tibble(x6, .name_repair = 'check_unique')
	pb_depth <- add_column(pb_depth, dur_hour = pb_depth$dur_min/60, .before = 2)
	pb_depth <- add_column(pb_depth, type = type)
	pb_depth <- add_column(pb_depth, percentile = percentile)
	return(pb_depth)
	}

	if(type == 'ratio'){
	x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
	x3 <- str_extract_all(x2, '\\([^)]*\\)') # keeps brackets and everything in the brackets
	x3 <- unlist(x3)
	x4 <- str_replace_all(x3, '[()]', '') # remove the brackets
	x5 <- x4[!x4 %in% c("h", "%")] # get rid of other things we don't need
	x5 <- as.numeric(x5)
	x6 <- matrix(x5, ncol = 7, byrow = TRUE) # matrix of rainfall ratios
	colnames(x6) <- c('dur_hour', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
	pb_ratio <- as_tibble(x6, .name_repair = 'check_unique') # confert to data frame
	pb_ratio <- add_column(pb_ratio, dur_min = pb_ratio$dur_hour*60, .before = 1) # add a column of minutes for consistency
	pb_ratio <- add_column(pb_ratio, type = type)
	pb_ratio <- add_column(pb_ratio, percentile = percentile)
	return(pb_ratio)
	}

	stop(str_c("Error in function Get_preburst. Expecting type = 'depth', or 'ratio'. Type = ", type ))

	}

	# Main routine______________________________________________________________________________


	hub_data <- Get_prebust_data(lat, lon)


	# Assemble a data frame

	# build a data frame that contains all the percentiles and types of interest

	request_data <- expand.grid(percentile = c(10, 25, 50, 75, 90), type = c('depth', 'ratio'))

	# Loop through all the percentiles and ratio and assemble a tidy data frame of the results

	tidy_preburst <- bind_rows(pmap(.l = request_data, .f = Extract_preburst, hub_data = hub_data))

	tidy_preburst %>%
	gather(-dur_min, -dur_hour, -type, -percentile, key = 'AEP', value = 'preburst') %>%
	mutate(AEP = as.integer(str_remove(AEP, "AEP_"))) # reformat the AEP values to be integers



	}


	# Function to get pre-burst data for a given latitude and longitude
	# returns a text file from the data hub

	Axe = Get_tidy_prebust(lat =-36.9, lon = 144.36)


	# Make facet labels
	AEP.labs <- str_c('AEP', c(1,2,5,10,20,50),'%')
	names(AEP.labs) <- c(1,2,5,10,20,50)

	p <- Axe %>%
	filter(type == 'ratio') %>%
	ggplot(aes(x = dur_hour, y = preburst, colour = factor(percentile))) +
	geom_line(size = 0.25) +
	geom_point(size = 0.5)+
	facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) +
	labs(y = 'Preburst ratio',
	x = 'Duration (hour)') +
	scale_colour_discrete(name = 'Percentile') +
	theme_gray(base_size = 5)

	p

	ggsave(file.path('figures', 'Axe_dur_ratio.png'), p, width = 4, height = 3)



	p1 <- Axe %>%
	filter(type == 'ratio') %>%
	ggplot(aes(x = percentile, y = preburst, colour = factor(dur_hour))) +
	geom_line(size = 0.25) +
	geom_point(size = 0.5)+
	facet_wrap(~AEP, labeller = labeller(AEP = AEP.labs)) +
	labs(y = 'Preburst ratio',
	x = 'Percentile') +
	scale_colour_discrete(name = 'Duration (hour)') +
	theme_gray(base_size = 5)
	p1

	ggsave(file.path('figures', 'Axe_percentile_ratio.png'), p1, width = 4, height = 3)