Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active March 20, 2020 04:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/97f396b2b913fff38a278fd75d27b710 to your computer and use it in GitHub Desktop.
Save TonyLadson/97f396b2b913fff38a278fd75d27b710 to your computer and use it in GitHub Desktop.
Function to scrape the data hub and return information on pre-burst rainfall, see https://tonyladson.wordpress.com/2020/03/16/scraping-the-data-hub/
# lat and lon must be latitude and longitude of some point on Australia, latitude must be negative
Get_preburst <- function(percentile = c(50, 10, 25, 75, 90), type = c("depth", "ratio"), lat, lon){
require(tidyverse)
require(stringr)
require(glue)
require(RCurl)
# Check inputs--------------------
if(identical(percentile, c(50, 10, 25, 75, 90))) percentile = 50 # take the first value as the default
if(!percentile %in% c(50, 10, 25, 75, 90) ) stop("percentile must be one of 10, 25, 50 75, 90")
if(identical(type, c("depth", "ratio"))) type = "depth" # take the first value as the default
if(!type %in% c("depth", "ratio")) stop("type must be either 'depth' or 'ratio'")
# Checking Latitude and Longitude
# From https://gist.github.com/graydon/11198540 the bounding box for Australia
# 'AU': ('Australia', (113.338953078, -43.6345972634, 153.569469029, -10.6681857235)),
if(lat > -10.6681857235 | lat < -43.6345972634) stop("Points must be within Australia")
if(lon < 113.338953078 | lon > 153.569469029) stop("Points must be within Australia")
# Functions-------------------------
.Get_start_end <- function(percentile){
# Given a percentile, get the start and end text to search in the data hub file
if(!percentile %in% c(10, 25, 50, 75, 90)) stop('Function Get_start_end expects percentile be one of 10,25, 50, 75, 90')
search_text <- case_when(percentile == 10 ~ c("PREBURST10","PREBURST10_META") ,
percentile == 25 ~ c("PREBURST25","PREBURST25_META"),
percentile == 50 ~ c("PREBURST","PREBURST_META"),
percentile == 75 ~ c("PREBURST75","PREBURST75_META"),
percentile == 90 ~ c("PREBURST90","PREBURST90_META"))
return(search_text)
}
# Scrape the data hub-----------------------
# See details at https://data.arr-software.org/about
hub_data <- RCurl::getURI(glue("https://data.arr-software.org/?lon_coord={lon}&lat_coord={lat}&type=text&Preburst=1&OtherPreburst=1"))
# Get search text
search_text <- .Get_start_end(percentile)
search_pattern = str_c("(?<=\\[", search_text[1],"\\])(.*)(?=\\[", search_text[2],"\\])") # Construct pattern for regex
pb_info <- str_extract(hub_data, regex(search_pattern, dotall = TRUE)) # extract the required pre-burst data from hub_data
if(type == 'depth'){
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
x3 <- str_replace_all(x2, '\\([^)]*\\)', '') # This removes brackets and everything between brackets
x3 <- str_split(x3, pattern = ",") # split at commas
x4 <- unlist(x3) # convert to vector
x5 <- x4[c(-1,-length(x4))] # remove first and last value
x5 <- x5[c(-1:-7)] # remove first seven values
x5 <- as.numeric(x5)
x6 <- matrix(x5, ncol = 7, byrow = TRUE)
colnames(x6) <- c('dur_min', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
pb_depth <- as_tibble(x6, .name_repair = 'check_unique')
pb_depth <- add_column(pb_depth, dur_hour = pb_depth$dur_min/60, .before = 2)
return(pb_depth)
}
if(type == 'ratio'){
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas
x3 <- str_extract_all(x2, '\\([^)]*\\)') # keeps brackets and everything in the brackets
x3 <- unlist(x3)
x4 <- str_replace_all(x3, '[()]', '') # remove the brackets
x5 <- x4[!x4 %in% c("h", "%")] # get rid of other things we don't need
x5 <- as.numeric(x5)
x6 <- matrix(x5, ncol = 7, byrow = TRUE) # matrix of rainfall ratios
colnames(x6) <- c('dur_hour', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' )
pb_ratio <- as_tibble(x6, .name_repair = 'check_unique') # confert to data frame
pb_ratio <- add_column(pb_ratio, dur_min = pb_ratio$dur_hour*60, .before = 1) # add a column of minutes for consistency
return(pb_ratio)
}
stop(str_c("Error in function Get_preburst. Expecting type = 'depth', or 'ratio'. Type = ", type ))
}
# Example
# Get_preburst(percentile = 90, type = 'depth', lat = -36.9, lon = 144.36)
# # A tibble: 11 x 8
# dur_min dur_hour AEP_50 AEP_20 AEP_10 AEP_5 AEP_2 AEP_1
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 60 1 22.3 25.2 27.1 29 30.6 31.8
# 2 90 1.5 24.2 25.4 26.1 26.9 31.5 35
# 3 120 2 27.2 33.4 37.5 41.5 43.6 45.2
# 4 180 3 23.3 27.1 29.6 32 43.4 52
# 5 360 6 19.5 24.3 27.5 30.6 42.5 51.4
# 6 720 12 12.8 20.3 25.3 30.1 34.4 37.6
# 7 1080 18 15.6 18.1 19.7 21.3 25.7 29
# 8 1440 24 14.6 18.8 21.6 24.2 24.3 24.4
# 9 2160 36 9 11.2 12.6 14 22.3 28.5
# 10 2880 48 1.1 2.7 3.7 4.7 10.7 15.1
# 11 4320 72 0.2 1.9 3 4 12.5 18.9
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment