Last active
March 20, 2020 04:51
-
-
Save TonyLadson/97f396b2b913fff38a278fd75d27b710 to your computer and use it in GitHub Desktop.
Function to scrape the data hub and return information on pre-burst rainfall, see https://tonyladson.wordpress.com/2020/03/16/scraping-the-data-hub/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# lat and lon must be latitude and longitude of some point on Australia, latitude must be negative | |
Get_preburst <- function(percentile = c(50, 10, 25, 75, 90), type = c("depth", "ratio"), lat, lon){ | |
require(tidyverse) | |
require(stringr) | |
require(glue) | |
require(RCurl) | |
# Check inputs-------------------- | |
if(identical(percentile, c(50, 10, 25, 75, 90))) percentile = 50 # take the first value as the default | |
if(!percentile %in% c(50, 10, 25, 75, 90) ) stop("percentile must be one of 10, 25, 50 75, 90") | |
if(identical(type, c("depth", "ratio"))) type = "depth" # take the first value as the default | |
if(!type %in% c("depth", "ratio")) stop("type must be either 'depth' or 'ratio'") | |
# Checking Latitude and Longitude | |
# From https://gist.github.com/graydon/11198540 the bounding box for Australia | |
# 'AU': ('Australia', (113.338953078, -43.6345972634, 153.569469029, -10.6681857235)), | |
if(lat > -10.6681857235 | lat < -43.6345972634) stop("Points must be within Australia") | |
if(lon < 113.338953078 | lon > 153.569469029) stop("Points must be within Australia") | |
# Functions------------------------- | |
.Get_start_end <- function(percentile){ | |
# Given a percentile, get the start and end text to search in the data hub file | |
if(!percentile %in% c(10, 25, 50, 75, 90)) stop('Function Get_start_end expects percentile be one of 10,25, 50, 75, 90') | |
search_text <- case_when(percentile == 10 ~ c("PREBURST10","PREBURST10_META") , | |
percentile == 25 ~ c("PREBURST25","PREBURST25_META"), | |
percentile == 50 ~ c("PREBURST","PREBURST_META"), | |
percentile == 75 ~ c("PREBURST75","PREBURST75_META"), | |
percentile == 90 ~ c("PREBURST90","PREBURST90_META")) | |
return(search_text) | |
} | |
# Scrape the data hub----------------------- | |
# See details at https://data.arr-software.org/about | |
hub_data <- RCurl::getURI(glue("https://data.arr-software.org/?lon_coord={lon}&lat_coord={lat}&type=text&Preburst=1&OtherPreburst=1")) | |
# Get search text | |
search_text <- .Get_start_end(percentile) | |
search_pattern = str_c("(?<=\\[", search_text[1],"\\])(.*)(?=\\[", search_text[2],"\\])") # Construct pattern for regex | |
pb_info <- str_extract(hub_data, regex(search_pattern, dotall = TRUE)) # extract the required pre-burst data from hub_data | |
if(type == 'depth'){ | |
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas | |
x3 <- str_replace_all(x2, '\\([^)]*\\)', '') # This removes brackets and everything between brackets | |
x3 <- str_split(x3, pattern = ",") # split at commas | |
x4 <- unlist(x3) # convert to vector | |
x5 <- x4[c(-1,-length(x4))] # remove first and last value | |
x5 <- x5[c(-1:-7)] # remove first seven values | |
x5 <- as.numeric(x5) | |
x6 <- matrix(x5, ncol = 7, byrow = TRUE) | |
colnames(x6) <- c('dur_min', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' ) | |
pb_depth <- as_tibble(x6, .name_repair = 'check_unique') | |
pb_depth <- add_column(pb_depth, dur_hour = pb_depth$dur_min/60, .before = 2) | |
return(pb_depth) | |
} | |
if(type == 'ratio'){ | |
x2 <- str_replace_all(pb_info, '\n', ',') # replace all the \n strings with commas so we can split at the commas | |
x3 <- str_extract_all(x2, '\\([^)]*\\)') # keeps brackets and everything in the brackets | |
x3 <- unlist(x3) | |
x4 <- str_replace_all(x3, '[()]', '') # remove the brackets | |
x5 <- x4[!x4 %in% c("h", "%")] # get rid of other things we don't need | |
x5 <- as.numeric(x5) | |
x6 <- matrix(x5, ncol = 7, byrow = TRUE) # matrix of rainfall ratios | |
colnames(x6) <- c('dur_hour', 'AEP_50','AEP_20','AEP_10','AEP_5','AEP_2','AEP_1' ) | |
pb_ratio <- as_tibble(x6, .name_repair = 'check_unique') # confert to data frame | |
pb_ratio <- add_column(pb_ratio, dur_min = pb_ratio$dur_hour*60, .before = 1) # add a column of minutes for consistency | |
return(pb_ratio) | |
} | |
stop(str_c("Error in function Get_preburst. Expecting type = 'depth', or 'ratio'. Type = ", type )) | |
} | |
# Example | |
# Get_preburst(percentile = 90, type = 'depth', lat = -36.9, lon = 144.36) | |
# # A tibble: 11 x 8 | |
# dur_min dur_hour AEP_50 AEP_20 AEP_10 AEP_5 AEP_2 AEP_1 | |
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> | |
# 1 60 1 22.3 25.2 27.1 29 30.6 31.8 | |
# 2 90 1.5 24.2 25.4 26.1 26.9 31.5 35 | |
# 3 120 2 27.2 33.4 37.5 41.5 43.6 45.2 | |
# 4 180 3 23.3 27.1 29.6 32 43.4 52 | |
# 5 360 6 19.5 24.3 27.5 30.6 42.5 51.4 | |
# 6 720 12 12.8 20.3 25.3 30.1 34.4 37.6 | |
# 7 1080 18 15.6 18.1 19.7 21.3 25.7 29 | |
# 8 1440 24 14.6 18.8 21.6 24.2 24.3 24.4 | |
# 9 2160 36 9 11.2 12.6 14 22.3 28.5 | |
# 10 2880 48 1.1 2.7 3.7 4.7 10.7 15.1 | |
# 11 4320 72 0.2 1.9 3 4 12.5 18.9 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment