Last active
March 25, 2019 00:57
-
-
Save TonyLadson/5b01838fef1140293397e23eebe12079 to your computer and use it in GitHub Desktop.
Fitting a probability model to POT data. See tonyladson.wordpress.com/2019/03/25/fitting-a-probability-model-to-pot-data/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Packages --------------------------------------------------------------- | |
library(boot) | |
library(tidyverse) | |
# Constants --------------------------------------------------------------- | |
# Standard probabilities and Exceedances per year | |
standard_probs <- c(0.5, 0.2, 0.1, 0.05, 0.02, 0.01) | |
standard_EY <- c(1, 0.69, 0.5, 0.22, 0.2, 0.11, 0.05, 0.02, 0.01) | |
# From ARR2016 Figure 1.2.1, Book 1, Chapter 2.2.5 | |
# Functions --------------------------------------------------------------- | |
# Function to calculate the second L moment | |
# From Wang (1996) | |
# Wang, Q. J. (1996). "Direct sample estimates of L moments." | |
# Water Resources Research 32(12): 3617-3619. | |
L2 <- function(q){ | |
q <- sort(q) | |
n <- length(q) | |
0.5*(1/choose(n,2))*sum((0:(n-1) - (n-1):0)*q) | |
} | |
# Plotting position | |
# Cunnane plotting position formula | |
# Q is a vector of flood peaks | |
Plotting_cunnane <- function(Q){ | |
n <- length(Q) | |
(1:n - 0.4)/(n + 0.2) | |
} | |
# Exponential distribution | |
# Exceedance probability associated with a discharge q | |
# They 1 - CDF from Table 3.2.1 of Book 2, Australian Rainfall and Runoff 2016 | |
Prob_q <- function(q, beta, q_star){ | |
exp(-(q - q_star)/beta) | |
} | |
# Discharge given an exceedance probability | |
Q_prob <- function(p, beta, q_star){ | |
q_star - beta*(log( p)) | |
} | |
# Convert AEP to EY | |
AEP2EY <- function(AEP) { | |
-log(1-AEP) | |
} | |
# Convert EY back to AEP | |
EY2AEP <- function(EY) { | |
(exp(EY) - 1)/exp(EY) | |
} | |
#AEP2EY(0.99) | |
#EY2AEP(4.6) | |
EY2AEP(2) | |
# Data -------------------------------------------------------------------- | |
# Partial series from the Styx River at Jeogla | |
styx <- c(74.0, 79.9, 85.2, 88.6, 91.7, 92.2, 92.2, 98.0, 105.0, 108.0, 111.0, | |
117.0, 117.0, 118.0, 119.0, 126.0, 129.0, 129.0, 134.0, 149.0, 150.0, 164.0, | |
186.0, 190.0, 194.0, 196.0, 206.0, 220.0, 221.0, 235.0, 238.0, 255.0, 255.0, | |
258.0, 283.0, 294.0, 300.0, 301.0, 309.0, 315.0, 405.0, 411.0, 436.0, 513.0, 521.0, 541.0, 878.0) | |
# # Write to clipboard | |
# clip <- pipe("pbcopy", "w") | |
# write.table(styx, file=clip, sep = '\t', row.names = FALSE) | |
# close(clip) | |
length(styx) #47 | |
# Calculate first two L moments | |
styx_L1 <- mean(styx) #226.36 | |
styx_L2 <- L2(styx) #79.12 | |
# Exponential parameters | |
styx_beta <- 2*styx_L2 #158.240 | |
styx_q_star <- styx_L1 - styx_beta #68.11748 | |
# Bootstrap estimates of parameter uncertainty ---------------------------- | |
# Confidence interval | |
# Create a function to estimate quantiles. Function is in a form | |
# that can be used for bootstraping | |
# pot_series is vector of independent flows | |
# indices provides an index to the vector of flows and is used for bootstrapping | |
# see boot::boot | |
# We need a function, that when applied to the data, return the statistics of interest, in this case the parameter estimates | |
# the second argement of the function facilitates the randome sampling of the data | |
# Exponential parameters | |
Exp_param <- function(pot_series, index){ | |
# Create bootstap sample | |
pot_series_rnd <- pot_series[index] | |
beta <- 2*L2(pot_series_rnd) | |
q_star <- mean(pot_series_rnd) - beta | |
out <- set_names(c(beta, q_star), nm = c('beta','q_star')) | |
return(out) | |
} | |
# Test | |
Exp_param(pot_series = styx, index = 1:47) | |
# should be the same as the orignal estimates, and is | |
# beta q_star | |
# 158.23996 68.11748 | |
# Bootstrap | |
param_errors_boot <- boot(data = styx, statistic = Exp_param, R = 5000, sim = 'ordinary', stype = 'i') | |
summary(param_errors_boot) | |
cor(param_errors_boot$t) # Correlation | |
# Creat a dataframe for plotting | |
param_errors_df <- as_data_frame(param_errors_boot$t) | |
# Create plot (web version) | |
p <- param_errors_df %>% | |
ggplot(aes(x = V1, y = V2)) + | |
geom_point(size = 0.1) + | |
scale_x_continuous(name = 'beta') + | |
scale_y_continuous(name = bquote('q'['*'])) + | |
stat_ellipse(colour = 'red') + # 95% confidence limit elipse | |
theme_gray(base_size = 7) | |
ggsave(file.path('../figures', 'param_uncertainty.png'), p, width = 4, height = 3) | |
# 95% Confidence intervals | |
boot.ci(param_errors_boot, index = c(1)) | |
boot.ci(param_errors_boot, index = c(2)) | |
# Estimate quantiles ------------------------------------------------------ | |
# fitted quantiles(w) as a function of EY | |
Flood_quantile <- function(EY, nu = 1, beta, q_star){ | |
q_star - beta*log(EY/nu) | |
} | |
# Estimate flow for EY = 0.01, approx. the 1% floow | |
Flood_quantile(0.01, nu = 1, beta = styx_beta, q_star = styx_q_star) | |
# [1] 796.8394 | |
# Estimate flows for all standard EY values | |
# Standard EY | |
res <- data_frame( | |
EY = standard_EY, | |
AEP = EY2AEP(standard_EY), | |
AEP_1inX = 1/AEP, | |
ARI = 1/standard_EY, | |
Q = Flood_quantile(EY, nu = 1, beta = styx_beta, q_star = styx_q_star)) | |
res | |
# # A tibble: 9 x 5 | |
# EY AEP AEP_1inX ARI Q | |
# <dbl> <dbl> <dbl> <dbl> <dbl> | |
# 1 1 0.632 1.58 1 68.1 | |
# 2 0.69 0.498 2.01 1.45 127. | |
# 3 0.5 0.393 2.54 2 178. | |
# 4 0.22 0.197 5.06 4.55 308. | |
# 5 0.2 0.181 5.52 5 323. | |
# 6 0.11 0.104 9.60 9.09 417. | |
# 7 0.05 0.0488 20.5 20 542. | |
# 8 0.02 0.0198 50.5 50 687. | |
# 9 0.01 0.00995 101. 100 797. | |
# # Write to clipboard | |
# clip <- pipe("pbcopy", "w") | |
# write.table(res, file=clip, sep = '\t', row.names = FALSE) | |
# close(clip) | |
# Function to boostrap flood quantiles to determine confidence intervalues | |
Flood_quantile_boot <- function(pot_series, indices, EY, nu = 1){ | |
pot_series <- pot_series[indices] | |
# L moments | |
l1 <- mean(pot_series) | |
l2 <- L2(pot_series) | |
# Exponential parameters | |
beta <- 2*l2 #158.240 | |
q_star <- l1 - beta #68.11748 | |
# Quantile | |
q_star - beta*log(EY/nu) | |
} | |
# test | |
Flood_quantile_boot(pot_series = styx, indices = 1:47, EY = 0.01) | |
# [1] 796.8394 1% flood, as expected. | |
Quantile_ci <- function(EY, pot_series){ | |
boot.out <- boot(data = pot_series, | |
statistic = Flood_quantile_boot, | |
R = 5000, | |
EY = EY) | |
ci <- boot.ci(boot.out, type = 'bca') | |
out <- data_frame(EY = EY, | |
est = ci$t0, | |
lwr = ci$bca[4], | |
upr = ci$bca[5]) | |
out | |
} | |
# Confidence limits | |
# Test 1% CL | |
Quantile_ci(EY = 0.01, pot_series = styx) | |
# # A tibble: 1 x 4 | |
# EY est lwr upr | |
# <dbl> <dbl> <dbl> <dbl> | |
# 1 0.01 797. 626. 1129. | |
# Confidence limits for the standard EY Values | |
conf_limits_bootstrap <- map_df(.x = standard_EY, | |
.f = Quantile_ci, | |
pot_series = styx) | |
conf_limits_bootstrap | |
# # A tibble: 9 x 4 | |
# EY est lwr upr | |
# <dbl> <dbl> <dbl> <dbl> | |
# 1 1 68.1 34.7 90.3 | |
# 2 0.69 127. 106. 151. | |
# 3 0.5 178. 150. 214. | |
# 4 0.22 308. 254. 396. | |
# 5 0.2 323. 266. 426. | |
# 6 0.11 417. 338. 559. | |
# 7 0.05 542. 434. 750. | |
# 8 0.02 687. 547. 955. | |
# 9 0.01 797. 628. 1132. | |
# Plot -------------------------------------------------------------------- | |
styx_plot_data <- data_frame(Q = sort(styx, decreasing = TRUE), EY = Plotting_cunnane(sort(styx))) | |
styx_plot_data | |
# Transformation for secondary axis (showing ARI, inverse of EY) | |
my_trans <- ~1/. | |
p <- styx_plot_data %>% | |
ggplot(aes(x = EY, y = Q)) + | |
geom_point(size = 0.1) + | |
scale_x_continuous(name = 'EY (Exceedances per year)', | |
trans = 'log10', | |
limits = c(0.01, 1), | |
breaks = standard_EY, sec.axis = sec_axis(trans=my_trans, # Add a secondary axis showing ARI | |
name='ARI (years)', | |
breaks = c(100, 50, 20, 10, 5, 2, 1))) + | |
scale_y_continuous(name = bquote('Peak flow ('*m^3*s^-1*')'), | |
breaks = seq(0,1200,100), | |
limits = c(NA, 1200)) + | |
theme_gray(base_size = 7) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) | |
p | |
# Calculate and add fitted values | |
EY_seq <- seq(0.01, 1, length.out = 100) | |
fit <- data_frame(EY_seq, flow = Flood_quantile (EY = EY_seq, | |
beta = styx_beta, | |
q_star = styx_q_star) ) | |
fit | |
p <- p + geom_line(data = fit,aes(x = EY_seq, y = flow), size = 0.3) | |
p | |
# Calculate confidence intervals for 20 points along the xaxis | |
conf_limits <- map_df(.x = seq(0.01, 1, length.out = 20), | |
.f = Quantile_ci, | |
pot_series = styx) | |
# Add to graph | |
p <- p + | |
geom_line(data = conf_limits, aes(x = EY, y = lwr), linetype = 'dashed', size = 0.3) + | |
geom_line(data = conf_limits, aes(x = EY, y = upr), linetype = 'dashed', size = 0.3) | |
# plot | |
p | |
ggsave(file.path('../figures', 'fit_plot.png'), p, width = 4, height = 3) | |
# Alternative version, increasing to the right | |
styx_plot_data <- data_frame(Q = sort(styx, decreasing = TRUE), ARI =1/ Plotting_cunnane(sort(styx))) | |
styx_plot_data | |
# transformation for secondary axis | |
my_trans <- ~1/. | |
p <- styx_plot_data %>% | |
ggplot(aes(x = ARI, y = Q)) + | |
geom_point(size = 0.1) + | |
scale_x_continuous(name = 'ARI (years)', | |
trans = 'log10', | |
limits = c(1,100), | |
breaks = c(1,2,5,10,20,50,100), | |
sec.axis = sec_axis(trans=my_trans, # Add a secondary axis showing ARI | |
name='EY', | |
breaks = standard_EY)) + | |
scale_y_continuous(name = bquote('Peak flow ('*m^3*s^-1*')'), | |
breaks = seq(0,1200,100), | |
limits = c(NA, 1200)) + | |
theme_gray(base_size = 7) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) | |
p | |
# Calculate and add fitted values | |
EY_seq <- seq(0.01, 1, length.out = 100) | |
ARI_seq = 1/EY_seq | |
fit <- data_frame(ARI_seq, flow = Flood_quantile (EY = EY_seq, | |
beta = styx_beta, | |
q_star = styx_q_star) ) | |
fit | |
p <- p + geom_line(data = fit, aes(x = ARI_seq, y = flow), size = 0.3) | |
p | |
# Calculate confidence intervals for 20 points along the xaxis | |
# These take a while to calculate using bootstrapping so I'm limiting the number of points | |
conf_limits <- map_df(.x = seq(0.01, 1, length.out = 20), | |
.f = Quantile_ci, | |
pot_series = styx) | |
# Add to graph | |
p <- p + | |
geom_line(data = conf_limits, aes(x = 1/EY, y = lwr), linetype = 'dashed', size = 0.3) + | |
geom_line(data = conf_limits, aes(x = 1/EY, y = upr), linetype = 'dashed', size = 0.3) | |
# plot | |
p | |
ggsave(file.path('../figures', 'fit_plot_ARI.png'), p, width = 4, height = 3) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment