Last active
June 5, 2023 00:11
-
-
Save TonyLadson/a4bde212add20f8494de479740f39afd to your computer and use it in GitHub Desktop.
Scraping the RFFE see hthttps://tonyladson.wordpress.com/2016/07/04/scraping-the-rffe/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RCurl) | |
library(jsonlite) | |
library(stringr) | |
library(dplyr) | |
library(ggplot2) | |
rffe.data <- postForm("http://rffe.arr.org.au/", | |
catchment_name = "test1", | |
lato = "-37", | |
lono = "148", | |
latc = "-37.2", | |
lonc = "148.2", | |
area = "100" | |
) | |
rffe.data <- as.character(rffe.data) # convert to text | |
# separate out parts with useful information, everything between[{ }] | |
# There are two separate parts | |
# 1. Results from gauges in the region of influence | |
# 2. Results at the chosen location | |
# grab the required parts using regex to get all the text between | |
# [{ }] | |
x <- stringr::str_match_all(rffe.data, '\\[\\{.*\\}\\]' ) | |
gauges.ffa.JSON <- x[[1]][1,] | |
RFFE.res <-x[[1]][2,] | |
gauges.ffa.JSON | |
# Convert to data frame | |
gauges.ffa.df <- jsonlite::fromJSON(gauges.ffa.JSON, flatten = TRUE) | |
# Assemble a data frame with the pieces we need | |
gauges.ffa <- gauges.ffa.df %>% | |
select(station_id, area, | |
record.length = sflength, | |
latc, lonc, lato, lono, | |
bcf, i2_6h, i50_6h, | |
mar, | |
shape.factor = sf, | |
flow_1pc = q1, | |
flow_2pc = q2, | |
flow_5pc = q5, | |
flow_10pc = q10, | |
flow_20pc = q20, | |
flow_50pc = q50, | |
flow_1pc_LCL = lower1, | |
flow_2pc_LCL = lower2, | |
flow_5pc_LCL = lower5, | |
flow_10pc_LCL = lower10, | |
flow_20pc_LCL = lower20, | |
flow_50pc_LCL = lower50, | |
flow_1pc_UCL = upper1, | |
flow_2pc_UCL = upper2, | |
flow_5pc_UCL = upper5, | |
flow_10pc_UCL = upper10, | |
flow_20pc_UCL = upper20, | |
flow_50pc_UCL = upper50, | |
region.name, | |
region.number, | |
region.version, | |
statistics.mean = statistics.mean, | |
statistics.mean_se, | |
statistics.skew, | |
statistics.skew_se, | |
statistics.stdev, | |
statistics.stdev_se) | |
# add the correlations, need to extract from nested loop | |
stat.correlation <-t(sapply(gauges.ffa.df$statistics.correlations, unlist)) | |
stat.correlation <- as.data.frame(stat.correlation) | |
stat.correlation <- stat.correlation[ ,c(2,4,5)] # don't need to have the columns with all '1' | |
names(stat.correlation) = c('cor_mean_sd', 'cor_mean_skew', 'cor_sd_skew') | |
# final data frame | |
gauges.ffa <- cbind(gauges.ffa, stat.correlation) # add to gauges.ffa | |
gauges.ffa | |
# Now deal with the RFFE.res part (the results for the location of interest) | |
RFFE.res | |
RFFE.res <- str_replace_all(RFFE.res, "[{\\[\\]}]", "") # remove braces and square brackets | |
RFFE.res <- str_replace_all(RFFE.res, c(":" = "", # remove characters that are not required | |
"'"= "", | |
"aep" = "", | |
"lower_limit"= "", | |
"upper_limit" = "", | |
"flow" = "", | |
"\\s+" = "")) # remove spaces | |
RFFE.res <- unlist(str_split(RFFE.res, ',')) # split at commas | |
RFFE.res <- as.data.frame(matrix(as.numeric(RFFE.res), ncol = 4, byrow = TRUE)) # convert to a data frame | |
names(RFFE.res) <- c('ARI', 'upper_limit', 'lower_limit', 'flow') # name columns | |
str(RFFE.res) | |
RFFE.res | |
# plot | |
# Create a theme | |
MyTheme = theme_bw() + | |
theme( | |
panel.background = element_rect(fill="gray98"), | |
axis.title.x = element_text(colour="grey20", size=14, margin=margin(20,0,0,0)), | |
axis.text.x = element_text(colour="grey20",size=12), | |
axis.title.y = element_text(colour="grey20",size=14, margin = margin(0,20,0,0)), | |
axis.text.y = element_text(colour="grey20",size=12), | |
legend.title = element_text(colour="grey20",size=12), | |
plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm"), # top, right, bottom, left | |
panel.grid.minor = element_line(colour="grey90", size=0.2), | |
panel.grid.major = element_line(colour="grey90", size=0.4)) | |
my.x.labels = c(2, 5, 10, 20, 50,100) | |
my.x.breaks <- qnorm(1/my.x.labels, lower.tail = FALSE) | |
# set up the breaks we need | |
my.y.breaks <- c(10, 100, 1000) | |
my.y.breaks | |
my.y.minor_breaks <- c(1:10, seq(10,100,10), seq(100, 1000, 100)) | |
# add AEP and zscore | |
RFFE.res %>% | |
mutate(AEP = 1/ARI) %>% | |
mutate(z.score = qnorm(AEP, lower.tail = FALSE)) %>% | |
ggplot() + | |
geom_line(aes(x = z.score, y = flow), linetype = 1, colour = 'blue') + | |
geom_point(aes(x = z.score, y = flow), colour = 'black', size = 3) + | |
geom_line(aes(x = z.score, y = lower_limit), linetype = 2, colour = 'blue') + | |
geom_line(aes(x = z.score, y = upper_limit), linetype = 2, colour = 'blue') + | |
scale_x_continuous(name = 'AEP 1 in Y years', breaks = my.x.breaks, labels = my.x.labels ) + | |
scale_y_log10(name = bquote('Peak discharge (m' ^3 *'s' ^-1 *')'), | |
limits = c(1,1000), | |
breaks = my.y.breaks, | |
labels = my.y.breaks, | |
minor_breaks = my.y.minor_breaks) + | |
MyTheme |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment