Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active June 5, 2023 00:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/a4bde212add20f8494de479740f39afd to your computer and use it in GitHub Desktop.
Save TonyLadson/a4bde212add20f8494de479740f39afd to your computer and use it in GitHub Desktop.
Scraping the RFFE see hthttps://tonyladson.wordpress.com/2016/07/04/scraping-the-rffe/
library(RCurl)
library(jsonlite)
library(stringr)
library(dplyr)
library(ggplot2)
rffe.data <- postForm("http://rffe.arr.org.au/",
catchment_name = "test1",
lato = "-37",
lono = "148",
latc = "-37.2",
lonc = "148.2",
area = "100"
)
rffe.data <- as.character(rffe.data) # convert to text
# separate out parts with useful information, everything between[{ }]
# There are two separate parts
# 1. Results from gauges in the region of influence
# 2. Results at the chosen location
# grab the required parts using regex to get all the text between
# [{ }]
x <- stringr::str_match_all(rffe.data, '\\[\\{.*\\}\\]' )
gauges.ffa.JSON <- x[[1]][1,]
RFFE.res <-x[[1]][2,]
gauges.ffa.JSON
# Convert to data frame
gauges.ffa.df <- jsonlite::fromJSON(gauges.ffa.JSON, flatten = TRUE)
# Assemble a data frame with the pieces we need
gauges.ffa <- gauges.ffa.df %>%
select(station_id, area,
record.length = sflength,
latc, lonc, lato, lono,
bcf, i2_6h, i50_6h,
mar,
shape.factor = sf,
flow_1pc = q1,
flow_2pc = q2,
flow_5pc = q5,
flow_10pc = q10,
flow_20pc = q20,
flow_50pc = q50,
flow_1pc_LCL = lower1,
flow_2pc_LCL = lower2,
flow_5pc_LCL = lower5,
flow_10pc_LCL = lower10,
flow_20pc_LCL = lower20,
flow_50pc_LCL = lower50,
flow_1pc_UCL = upper1,
flow_2pc_UCL = upper2,
flow_5pc_UCL = upper5,
flow_10pc_UCL = upper10,
flow_20pc_UCL = upper20,
flow_50pc_UCL = upper50,
region.name,
region.number,
region.version,
statistics.mean = statistics.mean,
statistics.mean_se,
statistics.skew,
statistics.skew_se,
statistics.stdev,
statistics.stdev_se)
# add the correlations, need to extract from nested loop
stat.correlation <-t(sapply(gauges.ffa.df$statistics.correlations, unlist))
stat.correlation <- as.data.frame(stat.correlation)
stat.correlation <- stat.correlation[ ,c(2,4,5)] # don't need to have the columns with all '1'
names(stat.correlation) = c('cor_mean_sd', 'cor_mean_skew', 'cor_sd_skew')
# final data frame
gauges.ffa <- cbind(gauges.ffa, stat.correlation) # add to gauges.ffa
gauges.ffa
# Now deal with the RFFE.res part (the results for the location of interest)
RFFE.res
RFFE.res <- str_replace_all(RFFE.res, "[{\\[\\]}]", "") # remove braces and square brackets
RFFE.res <- str_replace_all(RFFE.res, c(":" = "", # remove characters that are not required
"'"= "",
"aep" = "",
"lower_limit"= "",
"upper_limit" = "",
"flow" = "",
"\\s+" = "")) # remove spaces
RFFE.res <- unlist(str_split(RFFE.res, ',')) # split at commas
RFFE.res <- as.data.frame(matrix(as.numeric(RFFE.res), ncol = 4, byrow = TRUE)) # convert to a data frame
names(RFFE.res) <- c('ARI', 'upper_limit', 'lower_limit', 'flow') # name columns
str(RFFE.res)
RFFE.res
# plot
# Create a theme
MyTheme = theme_bw() +
theme(
panel.background = element_rect(fill="gray98"),
axis.title.x = element_text(colour="grey20", size=14, margin=margin(20,0,0,0)),
axis.text.x = element_text(colour="grey20",size=12),
axis.title.y = element_text(colour="grey20",size=14, margin = margin(0,20,0,0)),
axis.text.y = element_text(colour="grey20",size=12),
legend.title = element_text(colour="grey20",size=12),
plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm"), # top, right, bottom, left
panel.grid.minor = element_line(colour="grey90", size=0.2),
panel.grid.major = element_line(colour="grey90", size=0.4))
my.x.labels = c(2, 5, 10, 20, 50,100)
my.x.breaks <- qnorm(1/my.x.labels, lower.tail = FALSE)
# set up the breaks we need
my.y.breaks <- c(10, 100, 1000)
my.y.breaks
my.y.minor_breaks <- c(1:10, seq(10,100,10), seq(100, 1000, 100))
# add AEP and zscore
RFFE.res %>%
mutate(AEP = 1/ARI) %>%
mutate(z.score = qnorm(AEP, lower.tail = FALSE)) %>%
ggplot() +
geom_line(aes(x = z.score, y = flow), linetype = 1, colour = 'blue') +
geom_point(aes(x = z.score, y = flow), colour = 'black', size = 3) +
geom_line(aes(x = z.score, y = lower_limit), linetype = 2, colour = 'blue') +
geom_line(aes(x = z.score, y = upper_limit), linetype = 2, colour = 'blue') +
scale_x_continuous(name = 'AEP 1 in Y years', breaks = my.x.breaks, labels = my.x.labels ) +
scale_y_log10(name = bquote('Peak discharge (m' ^3 *'s' ^-1 *')'),
limits = c(1,1000),
breaks = my.y.breaks,
labels = my.y.breaks,
minor_breaks = my.y.minor_breaks) +
MyTheme
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment