TonyLadson/Pot_fit.R

## Pot_fit.R
# Packages ---------------------------------------------------------------

library(boot)
library(tidyverse)


# Constants ---------------------------------------------------------------

# Standard probabilities and Exceedances per year

standard_probs <- c(0.5, 0.2, 0.1, 0.05, 0.02, 0.01)
standard_EY <- c(1, 0.69, 0.5, 0.22, 0.2, 0.11, 0.05, 0.02, 0.01)
# From ARR2016 Figure 1.2.1, Book 1, Chapter 2.2.5


# Functions ---------------------------------------------------------------

# Function to calculate the second L moment
# From Wang (1996)
# Wang, Q. J. (1996). "Direct sample estimates of L moments."
# Water Resources Research 32(12): 3617-3619.


L2 <- function(q){
  q <- sort(q)
  n <- length(q)
  0.5*(1/choose(n,2))*sum((0:(n-1) - (n-1):0)*q)
}

# Plotting position

# Cunnane plotting position formula
# Q is a vector of flood peaks

Plotting_cunnane <- function(Q){
  n <- length(Q)
  (1:n - 0.4)/(n + 0.2)

}


# Exponential distribution

# Exceedance probability associated with a discharge q
# They 1 - CDF from Table 3.2.1 of Book 2, Australian Rainfall and Runoff 2016

Prob_q <- function(q, beta, q_star){

  exp(-(q - q_star)/beta)

}

# Discharge given an exceedance probability

Q_prob <- function(p, beta, q_star){

  q_star - beta*(log( p))

}

# Convert AEP to EY

AEP2EY <- function(AEP) {
  -log(1-AEP)
}

# Convert EY back to AEP
EY2AEP <- function(EY) {
  (exp(EY) - 1)/exp(EY)
}

#AEP2EY(0.99)
#EY2AEP(4.6)
EY2AEP(2)


# Data --------------------------------------------------------------------


# Partial series from the Styx River at Jeogla
styx <- c(74.0, 79.9,  85.2,  88.6,  91.7,  92.2,  92.2,  98.0, 105.0, 108.0, 111.0,
          117.0, 117.0, 118.0, 119.0, 126.0, 129.0, 129.0, 134.0, 149.0, 150.0, 164.0,
          186.0, 190.0, 194.0, 196.0, 206.0, 220.0, 221.0, 235.0, 238.0, 255.0, 255.0,
          258.0, 283.0, 294.0, 300.0, 301.0, 309.0, 315.0, 405.0, 411.0, 436.0, 513.0, 521.0, 541.0, 878.0)


# # Write to clipboard
# clip <- pipe("pbcopy", "w")
# write.table(styx, file=clip, sep = '\t', row.names = FALSE)
# close(clip)

length(styx) #47

# Calculate first two L moments

styx_L1  <-  mean(styx) #226.36
styx_L2 <- L2(styx)  #79.12


# Exponential parameters

styx_beta <- 2*styx_L2 #158.240
styx_q_star  <-  styx_L1 - styx_beta #68.11748


# Bootstrap estimates of parameter uncertainty ----------------------------


# Confidence interval

# Create a function to estimate quantiles.  Function is in a form
# that can be used for bootstraping

# pot_series is vector of independent flows
# indices provides an index to the vector of flows and is used for bootstrapping
# see boot::boot


# We need a function, that when applied to the data, return the statistics of interest, in this case the parameter estimates
# the second argement of the function facilitates the randome sampling of the data

# Exponential parameters

Exp_param <- function(pot_series, index){

  # Create bootstap sample
  pot_series_rnd <- pot_series[index]

  beta <- 2*L2(pot_series_rnd)
  q_star  <-  mean(pot_series_rnd) - beta

  out <- set_names(c(beta, q_star), nm = c('beta','q_star'))
  return(out)


}


# Test

Exp_param(pot_series = styx, index = 1:47)
# should be the same as the orignal estimates, and is
# beta    q_star
# 158.23996  68.11748

# Bootstrap
param_errors_boot <- boot(data = styx, statistic = Exp_param, R = 5000, sim = 'ordinary', stype = 'i')
summary(param_errors_boot)

cor(param_errors_boot$t) # Correlation

# Creat a dataframe for plotting
param_errors_df <- as_data_frame(param_errors_boot$t)

# Create plot (web version)
p <- param_errors_df %>%
  ggplot(aes(x = V1, y = V2)) +
    geom_point(size = 0.1) +
  scale_x_continuous(name = 'beta') +
  scale_y_continuous(name = bquote('q'['*'])) +
  stat_ellipse(colour = 'red') + # 95% confidence limit elipse
  theme_gray(base_size = 7)

ggsave(file.path('../figures', 'param_uncertainty.png'), p, width = 4, height = 3)

# 95% Confidence intervals
boot.ci(param_errors_boot, index = c(1))
boot.ci(param_errors_boot, index = c(2))


# Estimate quantiles ------------------------------------------------------


# fitted quantiles(w) as a function of EY

Flood_quantile <- function(EY, nu = 1, beta, q_star){
  q_star - beta*log(EY/nu)
}


# Estimate flow for EY = 0.01, approx. the 1% floow
Flood_quantile(0.01, nu = 1, beta = styx_beta, q_star = styx_q_star)
# [1] 796.8394


# Estimate flows for all standard EY values
# Standard EY


res <- data_frame(
  EY = standard_EY,
  AEP = EY2AEP(standard_EY),
  AEP_1inX = 1/AEP,
  ARI = 1/standard_EY,
  Q = Flood_quantile(EY, nu = 1, beta = styx_beta, q_star = styx_q_star))

res
# # A tibble: 9 x 5
# EY     AEP AEP_1inX    ARI     Q
# <dbl>   <dbl>    <dbl>  <dbl> <dbl>
# 1  1    0.632       1.58   1     68.1
# 2  0.69 0.498       2.01   1.45 127.
# 3  0.5  0.393       2.54   2    178.
# 4  0.22 0.197       5.06   4.55 308.
# 5  0.2  0.181       5.52   5    323.
# 6  0.11 0.104       9.60   9.09 417.
# 7  0.05 0.0488     20.5   20    542.
# 8  0.02 0.0198     50.5   50    687.
# 9  0.01 0.00995   101.   100    797.

# # Write to clipboard
# clip <- pipe("pbcopy", "w")
# write.table(res, file=clip, sep = '\t', row.names = FALSE)
# close(clip)


# Function to boostrap flood quantiles to determine confidence intervalues

Flood_quantile_boot <- function(pot_series, indices, EY, nu = 1){

  pot_series <- pot_series[indices]

  # L moments
  l1  <-  mean(pot_series)
  l2 <- L2(pot_series)


  # Exponential parameters

  beta <- 2*l2 #158.240
  q_star  <-  l1 - beta #68.11748

  # Quantile
  q_star - beta*log(EY/nu)
}


# test
Flood_quantile_boot(pot_series = styx, indices = 1:47, EY = 0.01)
# [1] 796.8394 1% flood, as expected.


Quantile_ci <- function(EY, pot_series){

  boot.out <- boot(data = pot_series,
                   statistic = Flood_quantile_boot,
                   R = 5000,
                   EY = EY)

  ci <- boot.ci(boot.out, type = 'bca')

  out <- data_frame(EY = EY,
                    est = ci$t0,
                    lwr = ci$bca[4],
                    upr = ci$bca[5])
  out

}

# Confidence limits

# Test 1% CL
Quantile_ci(EY = 0.01, pot_series = styx)

# # A tibble: 1 x 4
# EY   est   lwr   upr
# <dbl> <dbl> <dbl> <dbl>
#   1  0.01  797.  626. 1129.


# Confidence limits for the standard EY Values
conf_limits_bootstrap <- map_df(.x = standard_EY,
                                .f = Quantile_ci,
                                pot_series = styx)

conf_limits_bootstrap


# # A tibble: 9 x 4
# EY   est   lwr    upr
# <dbl> <dbl> <dbl>  <dbl>
#   1  1     68.1  34.7   90.3
# 2  0.69 127.  106.   151.
# 3  0.5  178.  150.   214.
# 4  0.22 308.  254.   396.
# 5  0.2  323.  266.   426.
# 6  0.11 417.  338.   559.
# 7  0.05 542.  434.   750.
# 8  0.02 687.  547.   955.
# 9  0.01 797.  628.  1132.


# Plot --------------------------------------------------------------------


styx_plot_data <- data_frame(Q = sort(styx, decreasing = TRUE), EY = Plotting_cunnane(sort(styx)))
styx_plot_data

# Transformation for secondary axis (showing ARI, inverse of EY)
my_trans <- ~1/.


p <- styx_plot_data %>%
  ggplot(aes(x = EY, y = Q)) +
  geom_point(size = 0.1) +
  scale_x_continuous(name = 'EY (Exceedances per year)',
                     trans = 'log10',
                     limits = c(0.01, 1),
                     breaks = standard_EY, sec.axis = sec_axis(trans=my_trans, # Add a secondary axis showing ARI
                                                               name='ARI (years)',
                                                               breaks = c(100, 50, 20, 10, 5, 2, 1))) +
  scale_y_continuous(name = bquote('Peak flow ('*m^3*s^-1*')'),
                     breaks = seq(0,1200,100),
                     limits = c(NA, 1200)) +
  theme_gray(base_size = 7) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

p

# Calculate and add fitted values

EY_seq <- seq(0.01, 1, length.out = 100)

fit <- data_frame(EY_seq, flow =  Flood_quantile (EY = EY_seq,
                                         beta = styx_beta,
                                         q_star = styx_q_star) )


fit

p <- p + geom_line(data = fit,aes(x = EY_seq, y = flow), size = 0.3)

p
# Calculate confidence intervals for 20 points along the xaxis

conf_limits <- map_df(.x = seq(0.01, 1, length.out = 20),
                      .f = Quantile_ci,
                      pot_series = styx)

# Add to graph
p <- p +
  geom_line(data = conf_limits, aes(x = EY, y = lwr), linetype = 'dashed', size = 0.3) +
  geom_line(data = conf_limits, aes(x = EY, y = upr), linetype = 'dashed', size = 0.3)


# plot
p


ggsave(file.path('../figures', 'fit_plot.png'), p, width = 4, height = 3)


# Alternative version, increasing to the right


styx_plot_data <- data_frame(Q = sort(styx, decreasing = TRUE), ARI =1/ Plotting_cunnane(sort(styx)))

styx_plot_data

# transformation for secondary axis
my_trans <- ~1/.


p <- styx_plot_data %>%
  ggplot(aes(x = ARI, y = Q)) +
  geom_point(size = 0.1) +
  scale_x_continuous(name = 'ARI (years)',
                     trans = 'log10',
                     limits = c(1,100),
                     breaks = c(1,2,5,10,20,50,100),
                     sec.axis = sec_axis(trans=my_trans, # Add a secondary axis showing ARI
                                                               name='EY',
                                                               breaks = standard_EY)) +
  scale_y_continuous(name = bquote('Peak flow ('*m^3*s^-1*')'),
                     breaks = seq(0,1200,100),
                     limits = c(NA, 1200)) +
  theme_gray(base_size = 7) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

p

# Calculate and add fitted values

EY_seq <- seq(0.01, 1, length.out = 100)
ARI_seq = 1/EY_seq


fit <- data_frame(ARI_seq, flow =  Flood_quantile (EY = EY_seq,
                                                  beta = styx_beta,
                                                  q_star = styx_q_star) )


fit

p <- p + geom_line(data = fit, aes(x = ARI_seq, y = flow), size = 0.3)

p
# Calculate confidence intervals for 20 points along the xaxis
# These take a while to calculate using bootstrapping so I'm limiting the number of points

conf_limits <- map_df(.x = seq(0.01, 1, length.out = 20),
                      .f = Quantile_ci,
                      pot_series = styx)

# Add to graph
p <- p +
  geom_line(data = conf_limits, aes(x = 1/EY, y = lwr), linetype = 'dashed', size = 0.3) +
  geom_line(data = conf_limits, aes(x = 1/EY, y = upr), linetype = 'dashed', size = 0.3)


# plot
p


ggsave(file.path('../figures', 'fit_plot_ARI.png'), p, width = 4, height = 3)
	# Packages ---------------------------------------------------------------

	library(boot)
	library(tidyverse)



	# Constants ---------------------------------------------------------------

	# Standard probabilities and Exceedances per year

	standard_probs <- c(0.5, 0.2, 0.1, 0.05, 0.02, 0.01)
	standard_EY <- c(1, 0.69, 0.5, 0.22, 0.2, 0.11, 0.05, 0.02, 0.01)
	# From ARR2016 Figure 1.2.1, Book 1, Chapter 2.2.5



	# Functions ---------------------------------------------------------------

	# Function to calculate the second L moment
	# From Wang (1996)
	# Wang, Q. J. (1996). "Direct sample estimates of L moments."
	# Water Resources Research 32(12): 3617-3619.


	L2 <- function(q){
	q <- sort(q)
	n <- length(q)
	0.5(1/choose(n,2))sum((0:(n-1) - (n-1):0)*q)
	}

	# Plotting position

	# Cunnane plotting position formula
	# Q is a vector of flood peaks

	Plotting_cunnane <- function(Q){
	n <- length(Q)
	(1:n - 0.4)/(n + 0.2)

	}


	# Exponential distribution

	# Exceedance probability associated with a discharge q
	# They 1 - CDF from Table 3.2.1 of Book 2, Australian Rainfall and Runoff 2016

	Prob_q <- function(q, beta, q_star){

	exp(-(q - q_star)/beta)

	}

	# Discharge given an exceedance probability

	Q_prob <- function(p, beta, q_star){

	q_star - beta*(log( p))

	}

	# Convert AEP to EY

	AEP2EY <- function(AEP) {
	-log(1-AEP)
	}

	# Convert EY back to AEP
	EY2AEP <- function(EY) {
	(exp(EY) - 1)/exp(EY)
	}

	#AEP2EY(0.99)
	#EY2AEP(4.6)
	EY2AEP(2)



	# Data --------------------------------------------------------------------



	# Partial series from the Styx River at Jeogla
	styx <- c(74.0, 79.9, 85.2, 88.6, 91.7, 92.2, 92.2, 98.0, 105.0, 108.0, 111.0,
	117.0, 117.0, 118.0, 119.0, 126.0, 129.0, 129.0, 134.0, 149.0, 150.0, 164.0,
	186.0, 190.0, 194.0, 196.0, 206.0, 220.0, 221.0, 235.0, 238.0, 255.0, 255.0,
	258.0, 283.0, 294.0, 300.0, 301.0, 309.0, 315.0, 405.0, 411.0, 436.0, 513.0, 521.0, 541.0, 878.0)


	# # Write to clipboard
	# clip <- pipe("pbcopy", "w")
	# write.table(styx, file=clip, sep = '\t', row.names = FALSE)
	# close(clip)

	length(styx) #47

	# Calculate first two L moments

	styx_L1 <- mean(styx) #226.36
	styx_L2 <- L2(styx) #79.12


	# Exponential parameters

	styx_beta <- 2*styx_L2 #158.240
	styx_q_star <- styx_L1 - styx_beta #68.11748



	# Bootstrap estimates of parameter uncertainty ----------------------------



	# Confidence interval

	# Create a function to estimate quantiles. Function is in a form
	# that can be used for bootstraping

	# pot_series is vector of independent flows
	# indices provides an index to the vector of flows and is used for bootstrapping
	# see boot::boot


	# We need a function, that when applied to the data, return the statistics of interest, in this case the parameter estimates
	# the second argement of the function facilitates the randome sampling of the data

	# Exponential parameters

	Exp_param <- function(pot_series, index){

	# Create bootstap sample
	pot_series_rnd <- pot_series[index]

	beta <- 2*L2(pot_series_rnd)
	q_star <- mean(pot_series_rnd) - beta

	out <- set_names(c(beta, q_star), nm = c('beta','q_star'))
	return(out)


	}


	# Test

	Exp_param(pot_series = styx, index = 1:47)
	# should be the same as the orignal estimates, and is
	# beta q_star
	# 158.23996 68.11748

	# Bootstrap
	param_errors_boot <- boot(data = styx, statistic = Exp_param, R = 5000, sim = 'ordinary', stype = 'i')
	summary(param_errors_boot)

	cor(param_errors_boot$t) # Correlation

	# Creat a dataframe for plotting
	param_errors_df <- as_data_frame(param_errors_boot$t)

	# Create plot (web version)
	p <- param_errors_df %>%
	ggplot(aes(x = V1, y = V2)) +
	geom_point(size = 0.1) +
	scale_x_continuous(name = 'beta') +
	scale_y_continuous(name = bquote('q'['*'])) +
	stat_ellipse(colour = 'red') + # 95% confidence limit elipse
	theme_gray(base_size = 7)

	ggsave(file.path('../figures', 'param_uncertainty.png'), p, width = 4, height = 3)

	# 95% Confidence intervals
	boot.ci(param_errors_boot, index = c(1))
	boot.ci(param_errors_boot, index = c(2))



	# Estimate quantiles ------------------------------------------------------


	# fitted quantiles(w) as a function of EY

	Flood_quantile <- function(EY, nu = 1, beta, q_star){
	q_star - beta*log(EY/nu)
	}


	# Estimate flow for EY = 0.01, approx. the 1% floow
	Flood_quantile(0.01, nu = 1, beta = styx_beta, q_star = styx_q_star)
	# [1] 796.8394


	# Estimate flows for all standard EY values
	# Standard EY


	res <- data_frame(
	EY = standard_EY,
	AEP = EY2AEP(standard_EY),
	AEP_1inX = 1/AEP,
	ARI = 1/standard_EY,
	Q = Flood_quantile(EY, nu = 1, beta = styx_beta, q_star = styx_q_star))

	res
	# # A tibble: 9 x 5
	# EY AEP AEP_1inX ARI Q
	# <dbl> <dbl> <dbl> <dbl> <dbl>
	# 1 1 0.632 1.58 1 68.1
	# 2 0.69 0.498 2.01 1.45 127.
	# 3 0.5 0.393 2.54 2 178.
	# 4 0.22 0.197 5.06 4.55 308.
	# 5 0.2 0.181 5.52 5 323.
	# 6 0.11 0.104 9.60 9.09 417.
	# 7 0.05 0.0488 20.5 20 542.
	# 8 0.02 0.0198 50.5 50 687.
	# 9 0.01 0.00995 101. 100 797.

	# # Write to clipboard
	# clip <- pipe("pbcopy", "w")
	# write.table(res, file=clip, sep = '\t', row.names = FALSE)
	# close(clip)




	# Function to boostrap flood quantiles to determine confidence intervalues

	Flood_quantile_boot <- function(pot_series, indices, EY, nu = 1){

	pot_series <- pot_series[indices]

	# L moments
	l1 <- mean(pot_series)
	l2 <- L2(pot_series)


	# Exponential parameters

	beta <- 2*l2 #158.240
	q_star <- l1 - beta #68.11748

	# Quantile
	q_star - beta*log(EY/nu)
	}


	# test
	Flood_quantile_boot(pot_series = styx, indices = 1:47, EY = 0.01)
	# [1] 796.8394 1% flood, as expected.



	Quantile_ci <- function(EY, pot_series){

	boot.out <- boot(data = pot_series,
	statistic = Flood_quantile_boot,
	R = 5000,
	EY = EY)

	ci <- boot.ci(boot.out, type = 'bca')

	out <- data_frame(EY = EY,
	est = ci$t0,
	lwr = ci$bca[4],
	upr = ci$bca[5])
	out

	}

	# Confidence limits

	# Test 1% CL
	Quantile_ci(EY = 0.01, pot_series = styx)

	# # A tibble: 1 x 4
	# EY est lwr upr
	# <dbl> <dbl> <dbl> <dbl>
	# 1 0.01 797. 626. 1129.


	# Confidence limits for the standard EY Values
	conf_limits_bootstrap <- map_df(.x = standard_EY,
	.f = Quantile_ci,
	pot_series = styx)

	conf_limits_bootstrap


	# # A tibble: 9 x 4
	# EY est lwr upr
	# <dbl> <dbl> <dbl> <dbl>
	# 1 1 68.1 34.7 90.3
	# 2 0.69 127. 106. 151.
	# 3 0.5 178. 150. 214.
	# 4 0.22 308. 254. 396.
	# 5 0.2 323. 266. 426.
	# 6 0.11 417. 338. 559.
	# 7 0.05 542. 434. 750.
	# 8 0.02 687. 547. 955.
	# 9 0.01 797. 628. 1132.



	# Plot --------------------------------------------------------------------



	styx_plot_data <- data_frame(Q = sort(styx, decreasing = TRUE), EY = Plotting_cunnane(sort(styx)))
	styx_plot_data

	# Transformation for secondary axis (showing ARI, inverse of EY)
	my_trans <- ~1/.


	p <- styx_plot_data %>%
	ggplot(aes(x = EY, y = Q)) +
	geom_point(size = 0.1) +
	scale_x_continuous(name = 'EY (Exceedances per year)',
	trans = 'log10',
	limits = c(0.01, 1),
	breaks = standard_EY, sec.axis = sec_axis(trans=my_trans, # Add a secondary axis showing ARI
	name='ARI (years)',
	breaks = c(100, 50, 20, 10, 5, 2, 1))) +
	scale_y_continuous(name = bquote('Peak flow ('m^3s^-1*')'),
	breaks = seq(0,1200,100),
	limits = c(NA, 1200)) +
	theme_gray(base_size = 7) +
	theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

	p

	# Calculate and add fitted values

	EY_seq <- seq(0.01, 1, length.out = 100)

	fit <- data_frame(EY_seq, flow = Flood_quantile (EY = EY_seq,
	beta = styx_beta,
	q_star = styx_q_star) )


	fit

	p <- p + geom_line(data = fit,aes(x = EY_seq, y = flow), size = 0.3)

	p
	# Calculate confidence intervals for 20 points along the xaxis

	conf_limits <- map_df(.x = seq(0.01, 1, length.out = 20),
	.f = Quantile_ci,
	pot_series = styx)

	# Add to graph
	p <- p +
	geom_line(data = conf_limits, aes(x = EY, y = lwr), linetype = 'dashed', size = 0.3) +
	geom_line(data = conf_limits, aes(x = EY, y = upr), linetype = 'dashed', size = 0.3)


	# plot
	p


	ggsave(file.path('../figures', 'fit_plot.png'), p, width = 4, height = 3)


	# Alternative version, increasing to the right



	styx_plot_data <- data_frame(Q = sort(styx, decreasing = TRUE), ARI =1/ Plotting_cunnane(sort(styx)))

	styx_plot_data

	# transformation for secondary axis
	my_trans <- ~1/.


	p <- styx_plot_data %>%
	ggplot(aes(x = ARI, y = Q)) +
	geom_point(size = 0.1) +
	scale_x_continuous(name = 'ARI (years)',
	trans = 'log10',
	limits = c(1,100),
	breaks = c(1,2,5,10,20,50,100),
	sec.axis = sec_axis(trans=my_trans, # Add a secondary axis showing ARI
	name='EY',
	breaks = standard_EY)) +
	scale_y_continuous(name = bquote('Peak flow ('m^3s^-1*')'),
	breaks = seq(0,1200,100),
	limits = c(NA, 1200)) +
	theme_gray(base_size = 7) +
	theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

	p

	# Calculate and add fitted values

	EY_seq <- seq(0.01, 1, length.out = 100)
	ARI_seq = 1/EY_seq


	fit <- data_frame(ARI_seq, flow = Flood_quantile (EY = EY_seq,
	beta = styx_beta,
	q_star = styx_q_star) )


	fit

	p <- p + geom_line(data = fit, aes(x = ARI_seq, y = flow), size = 0.3)

	p
	# Calculate confidence intervals for 20 points along the xaxis
	# These take a while to calculate using bootstrapping so I'm limiting the number of points

	conf_limits <- map_df(.x = seq(0.01, 1, length.out = 20),
	.f = Quantile_ci,
	pot_series = styx)

	# Add to graph
	p <- p +
	geom_line(data = conf_limits, aes(x = 1/EY, y = lwr), linetype = 'dashed', size = 0.3) +
	geom_line(data = conf_limits, aes(x = 1/EY, y = upr), linetype = 'dashed', size = 0.3)


	# plot
	p


	ggsave(file.path('../figures', 'fit_plot_ARI.png'), p, width = 4, height = 3)