Last active
March 12, 2019 10:52
-
-
Save TonyLadson/c8f1cda4d334291402d857b1ce1320b8 to your computer and use it in GitHub Desktop.
Time of concentration, analysis of data collected by Pilgrim and McDermott. See https://wordpress.com/post/tonyladson.wordpress.com/10599
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggmap) | |
library(hydroGOF) | |
library(tidyverse) | |
library(readxl) | |
library(ggrepel) | |
# Analysis of time of concentration data | |
# Source: Appendix F of McDarmott, G. E. and Pilgrim, D. H. (1982) Design flood estimation for small catchments in NSW | |
# Department of National Development and Energy | |
# Australian Water Resources Council | |
# Research Project No. 78/104 | |
# Technical Paper No. 73 | |
# Australian Government Publishing Service | |
# Canberra, 1982 | |
# Authors are at the School of Civil Engineering, The University of New South Wales | |
# Functions --------------------------------------------------------------- | |
# function to plot logarithmic minor breaks | |
# from https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot | |
log_breaks = function(maj, radix=10) { | |
function(x) { | |
minx = floor(min(logb(x,radix), na.rm=T)) - 1 | |
maxx = ceiling(max(logb(x,radix), na.rm=T)) + 1 | |
n_major = maxx - minx + 1 | |
major_breaks = seq(minx, maxx, by=1) | |
if (maj) { | |
breaks = major_breaks | |
} else { | |
steps = logb(1:(radix-1),radix) | |
breaks = rep(steps, times=n_major) + | |
rep(major_breaks, each=radix-1) | |
} | |
radix^breaks | |
} | |
} | |
scale_x_log_eng = function(..., radix=10) { | |
scale_x_continuous(..., | |
trans=scales::log_trans(radix), | |
breaks=log_breaks(TRUE, radix), | |
minor_breaks=log_breaks(FALSE, radix)) | |
} | |
scale_y_log_eng = function(..., radix=10) { | |
scale_y_continuous(..., | |
trans=scales::log_trans(radix), | |
breaks=log_breaks(TRUE, radix), | |
minor_breaks=log_breaks(FALSE, radix)) | |
} | |
# Load and munge data ----------------------------------------------------- | |
# Load data from google drive | |
id <- "1haWTujT0XazONED5ZwH9O0yRcuet6QSc" | |
response <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id), | |
skip = 34, | |
col_names = FALSE) | |
names(response) <- c('gauge_num', 'station_name', 'waterway_name', | |
'latitude', 'longitude', 'authority', | |
'time_hrs', 'source', 'area_alt', 'area', | |
'stream_length', 'slope_e', 'slope_e_error', | |
'slope_a', 'slope_a_error') | |
# gauge_num = gauge number | |
# station_name = station name | |
# waterway_name = waterway name | |
# latitude = latitude | |
# longitude = longitude | |
# authority = authority | |
# time_hrs = catchment response time in hours | |
# source = source of data (see data file) | |
# area_alt = catchment area obtained from Bureau of Meteorlogy | |
# either from the Water Resources Station Catalogue or Water Data Online (sq-km) | |
# area = area from the original source (se_km) | |
# slope_e = equal area slope | |
# slop_e_error = some data in the original document had E after the numerical value. | |
# I've put the E in a separate column (and assumed it meant error) | |
# slope_a = main stream average slope | |
# slope_a_error = some data in the original document had E after the numerical value. | |
# I've put the E in a separate column (and assumed it meant error) | |
glimpse(response) | |
# Map gauges -------------------------------------------------------------- | |
# You need an API key for this to work see ?register_google | |
aust_map <- ggmap(get_map(location = c(lon =147 , lat = -29), zoom = 5), extent = 'device') | |
# Centre on the half way up the East Coast | |
aust_map | |
# Add points were we have the data | |
p <- aust_map + | |
geom_point(aes(x = longitude, y = latitude), colour = 'red', size = 0.025, data = response) + | |
theme_minimal(base_size=5) + | |
labs(x = NULL, y = NULL) | |
p | |
# Save for webpage | |
ggsave(file.path('../figures', 'gauge_map.png'), p, width = 4, height = 4) | |
# Response time as a function of area ------------------------------------- | |
response %>% | |
summarise(min(area), max(area), min(time_hrs), max(time_hrs)) | |
# # A tibble: 1 x 4 | |
# `min(area)` `max(area)` `min(time_hrs)` `max(time_hrs)` | |
# <dbl> <dbl> <dbl> <dbl> | |
# 1 0.04 1092 0.15 16.4 | |
m_area <- lm(log10(time_hrs) ~ log10(area), data = response) | |
# Coefficients: | |
# (Intercept) log10(area) | |
# -0.1215 0.3830 | |
summary(m_area) | |
# Call: | |
# lm(formula = log10(time_hrs) ~ log10(area), data = response) | |
# | |
# Residuals: | |
# Min 1Q Median 3Q Max | |
# -0.54083 -0.09732 0.01513 0.13669 0.36459 | |
# | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) -0.12150 0.03000 -4.049 0.000105 *** | |
# log10(area) 0.38298 0.01657 23.119 < 2e-16 *** | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 0.1878 on 94 degrees of freedom | |
# Multiple R-squared: 0.8504, Adjusted R-squared: 0.8488 | |
# F-statistic: 534.5 on 1 and 94 DF, p-value: < 2.2e-16 | |
# Intercept | |
10^coef(m_area)[1] | |
#0.7560973 | |
# Power | |
coef(m_area)[2] | |
# log10(area) | |
# 0.3828074 | |
# So the fitted equation is the same as Pilgrim and McDermott | |
# 0.76A^0.38 | |
p <- response %>% ggplot(aes(x = area, y = time_hrs)) + | |
geom_point(size = 0.1) + | |
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000), | |
labels = scales::comma) + | |
scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.01, 30)) + | |
geom_smooth(method = lm, se = FALSE, size = 0.3) | |
p | |
# Add prediction intervals | |
area_seq <- data_frame(area = seq(0.05, 1200, length.out = 1000)) # sequence for plotting | |
predict_df <- as.data.frame(predict.lm(m_area, | |
newdata = area_seq, | |
interval = 'prediction')) %>% | |
bind_cols(area_seq) %>% | |
mutate(fit = 10^fit, # transform so points plot correctly | |
lwr = 10^lwr, | |
upr = 10^upr) | |
# Add upper and lower prediction intervals to plot | |
p <- p + | |
geom_line(data = predict_df, aes(x = area, y = lwr), | |
colour = 'black', | |
size = 0.1, | |
linetype = 'dashed') + | |
geom_line(data = predict_df, aes(x = area, y = upr), | |
colour = 'black', | |
size = 0.1, | |
linetype = 'dashed') + | |
theme_grey(base_size = 7) | |
p | |
ggsave(file.path('../figures', 'toc_area.png'), p, width = 4, height = 3) | |
# Prediction interval for a 100 km-sq catchment | |
10^predict.lm(m_area, newdata = data_frame(area = 100), interval = 'prediction') | |
# fit lwr upr | |
# 4.407502 1.848486 10.50918 | |
# plot predicted and actual characteristic time | |
# Natural scale | |
response %>% | |
mutate(PM_predict = 10^predict(m_area)) %>% | |
ggplot(aes(x = PM_predict, y = time_hrs)) + | |
geom_point() + | |
geom_smooth( se = FALSE, linetype = 'dashed') + | |
geom_abline(slope = 1, intercept = 0) + | |
scale_x_continuous(name = 'Predicted time (h)') + | |
scale_y_continuous(name = 'Observed time (h)') | |
# Log scale | |
response %>% | |
mutate(PM_predict = 10^predict(m_area)) %>% | |
ggplot(aes(x = PM_predict, y = time_hrs)) + | |
geom_point() + | |
geom_smooth( se = FALSE, linetype = 'dashed') + | |
geom_abline(slope = 1, intercept = 0) + | |
scale_x_log_eng(name = 'Pilgrim McDermott Predicted time (h)') + | |
scale_y_log_eng(name = 'Observed time (h)') + | |
annotate(geom = 'text', x = 10, y = 0.3, label = 'data', hjust = 'left') + | |
annotate(geom = 'segment', x = 7, xend = 9, | |
y = 0.3, yend = 0.3, | |
colour = 'blue', | |
linetype = 'dashed') + | |
annotate(geom = 'text', x = 10, y = 0.4, label = '1:1', hjust = 'left') + | |
annotate(geom = 'segment', x = 7, xend = 9, | |
y = 0.4, yend = 0.4, | |
colour = 'black') | |
# Nash-Sutcliffe coefficient of efficiency | |
response %>% | |
mutate(PM_predict = 10^predict(m_area)) %>% | |
summarise(nse = hydroGOF::NSE(PM_predict, time_hrs)) | |
# # A tibble: 1 x 1 | |
# nse | |
# <dbl> | |
# 1 0.696 | |
# Calculating NSE in the log domain | |
response %>% | |
mutate(PM_predict = 10^predict(m_area)) %>% | |
summarise(nse = hydroGOF::NSE(PM_predict, time_hrs, FUN = log10)) | |
# 0.848 | |
# Catchment response as a function of slope ------------------------------- | |
response %>% | |
summarise(min(slope_e), max(slope_e), min(time_hrs), max(time_hrs)) | |
# `min(slope_e)` `max(slope_e)` `min(time_hrs)` `max(time_hrs)` | |
# <dbl> <dbl> <dbl> <dbl> | |
# 1 1.38 201 0.15 16.4 | |
p <- response %>% ggplot(aes(x = slope_e, y = time_hrs)) + | |
geom_point(size = 0.1) + | |
scale_x_log_eng(name = 'Slope (m/km)', #limits = c(1, 150) | |
labels = scales::comma) + | |
scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.1, 22)) + | |
geom_smooth(method = lm, se = FALSE, size = 0.3) + | |
theme_grey(base_size = 7) | |
p | |
ggsave(file.path('../figures', 'toc_slope.png'), p, width = 4, height = 3) | |
m_slope <- lm(log(time_hrs) ~ log(slope_e), data = response) | |
summary(m_slope) | |
# Call: | |
# lm(formula = log(time_hrs) ~ log(slope_e), data = response) | |
# | |
# Residuals: | |
# Min 1Q Median 3Q Max | |
# -1.6480 -0.5351 0.1033 0.5338 1.4076 | |
# | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) 3.24165 0.19175 16.91 <2e-16 *** | |
# log(slope_e) -0.76101 0.05941 -12.81 <2e-16 *** | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 0.6747 on 94 degrees of freedom | |
# Multiple R-squared: 0.6358, Adjusted R-squared: 0.6319 | |
# F-statistic: 164.1 on 1 and 94 DF, p-value: < 2.2e-16 | |
# Response as a function of stream_length --------------------------------- | |
response %>% | |
summarise(min(stream_length), max(stream_length)) | |
# # A tibble: 1 x 2 | |
# `min(stream_length)` `max(stream_length)` | |
# <dbl> <dbl> | |
# 1 0.33 123 | |
# > | |
p <- response %>% | |
ggplot(aes(x = stream_length, y = time_hrs)) + | |
geom_point(size = 0.1) + | |
scale_x_continuous(name = 'Stream length (km)') + | |
scale_y_continuous(name = 'Catchment response time (hours)') + | |
geom_smooth(method = 'lm', se = FALSE, size = 0.3) + | |
theme_gray(base_size = 7) | |
ggsave(file.path('../figures', 'toc_stream_length.png'), p, width = 4, height = 3) | |
m_stream_length <- lm(time_hrs ~ stream_length, data = response) | |
summary(m_stream_length) | |
# Relationship between area and stream_length ----------------------------- | |
p <- response %>% | |
ggplot(aes(x = area, y = stream_length)) + | |
geom_point(size = 0.1) + | |
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000), | |
labels = scales::comma) + | |
scale_y_log_eng(name = 'Stream length (km)', | |
labels = scales::comma) + | |
geom_smooth(method = 'lm', se = FALSE, size = 0.3) + | |
theme_gray(base_size = 7) | |
ggsave(file.path('../figures', 'area_stream_length.png'), p, width = 4, height = 3) | |
m_area_stream_length <- lm(log10(stream_length) ~ log10(area), data = response) | |
summary(m_area_stream_length) | |
# Call: | |
# lm(formula = log10(stream_length) ~ log10(area), data = response) | |
# | |
# Residuals: | |
# Min 1Q Median 3Q Max | |
# -0.233840 -0.055750 0.003031 0.060985 0.220033 | |
# | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) 0.233183 0.014462 16.12 <2e-16 *** | |
# log10(area) 0.566083 0.008041 70.40 <2e-16 *** | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 0.09047 on 93 degrees of freedom | |
# Multiple R-squared: 0.9816, Adjusted R-squared: 0.9814 | |
# F-statistic: 4956 on 1 and 93 DF, p-value: < 2.2e-16 | |
# Log scale | |
response %>% | |
ggplot(aes(x = area, y = stream_length)) + | |
geom_point() + | |
scale_x_continuous(name = bquote('Area ('*km^2*')'), | |
limits = c(0.01, 2000), | |
trans = 'sqrt', | |
labels = scales::comma) + | |
scale_y_continuous(name = 'Stream length (km)', | |
labels = scales::comma) + | |
geom_smooth() | |
# Relationship between area and slope ------------------------------------- | |
response %>% | |
ggplot(aes(x = area, y = slope_e)) + | |
geom_point() + | |
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000), | |
labels = scales::comma) + | |
scale_y_log_eng(name = 'Slope', limits = c(1, 220), | |
labels = scales::comma) + | |
geom_smooth() | |
# Web version | |
p <- response %>% | |
ggplot(aes(x = area, y = slope_e)) + | |
geom_point(size = 0.1) + | |
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000), | |
labels = scales::comma) + | |
scale_y_log_eng(name = 'Slope (m/km)', limits = c(1, 220), | |
labels = scales::comma) + | |
geom_smooth(method = 'lm', se = FALSE, size = 0.3) + | |
theme_gray(base_size = 7) | |
ggsave(file.path('../figures', 'area_slope.png'), p, width = 4, height = 3) | |
m_area_slope <- lm(log(slope_e) ~ log(area), data = response) | |
summary(m_area_slope) | |
# Call: | |
# lm(formula = log(slope_e) ~ log(area), data = response) | |
# | |
# Residuals: | |
# Min 1Q Median 3Q Max | |
# -1.92470 -0.40829 0.03406 0.44907 1.36601 | |
# | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) 4.15078 0.10837 38.30 <2e-16 *** | |
# log(area) -0.35477 0.02599 -13.65 <2e-16 *** | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 0.6782 on 94 degrees of freedom | |
# Multiple R-squared: 0.6647, Adjusted R-squared: 0.6612 | |
# F-statistic: 186.4 on 1 and 94 DF, p-value: < 2.2e-16 | |
# Slope decreases as catchmen area increases | |
# Catchment response as a function of slope and area ------------------------------- | |
m_slope_area <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area), data = response) | |
summary(m_slope_area) | |
# Call: | |
# lm(formula = log10(time_hrs) ~ log10(slope_e) + log10(area), | |
# data = response) | |
# | |
# Residuals: | |
# Min 1Q Median 3Q Max | |
# -0.51656 -0.07466 0.02242 0.12641 0.35864 | |
# | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) 0.11195 0.12035 0.930 0.3547 | |
# log10(slope_e) -0.12950 0.06472 -2.001 0.0483 * | |
# log10(area) 0.33704 0.02816 11.968 <2e-16 *** | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 0.1848 on 93 degrees of freedom | |
# Multiple R-squared: 0.8566, Adjusted R-squared: 0.8535 | |
# F-statistic: 277.8 on 2 and 93 DF, p-value: < 2.2e-16 | |
# Catchment response as a function area + slope + stream length -------------------------------------------- | |
m_slope_area_stream_length <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area) + stream_length, data = response) | |
summary(m_slope_area_stream_length) | |
# Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) 0.1452158 0.1298496 1.118 0.2664 | |
# log10(slope_e) -0.1482061 0.0702580 -2.109 0.0377 * | |
# log10(area) 0.3457835 0.0308687 11.202 <2e-16 *** | |
# stream_length -0.0008966 0.0012429 -0.721 0.4725 | |
# --- | |
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
# | |
# Residual standard error: 0.1863 on 91 degrees of freedom | |
# Multiple R-squared: 0.8555, Adjusted R-squared: 0.8507 | |
# F-statistic: 179.6 on 3 and 91 DF, p-value: < 2.2e-16 | |
# No value in including stream length | |
# Best model -------------------------------------------------------------- | |
# McDermott and Pilgrim decided to use a model just based on area, rather than slope and area | |
# Lets see if that is justified | |
anova(m_slope_area, m_area) | |
# Analysis of Variance Table | |
# | |
# Model 1: log10(time_hrs) ~ log10(slope_e) + log10(area) | |
# Model 2: log10(time_hrs) ~ log10(area) | |
# Res.Df RSS Df Sum of Sq F Pr(>F) | |
# 1 93 3.1771 | |
# 2 94 3.3139 -1 -0.13677 4.0034 0.04832 * | |
# Some value in including area but not much | |
AIC(m_slope) | |
# 200.9 | |
AIC(m_area) | |
# -44.7 | |
AIC(m_slope_area) | |
# -46.8 | |
BIC(m_slope) | |
# 208.5 | |
BIC(m_area) | |
# -37.0 | |
BIC(m_slope_area) | |
# -36.5 | |
# So leaving slope out is reasonable. | |
# AIC suggests including slope and area | |
# BIC suggests just area | |
# The adjusted R-squared is | |
# 0.854 with slope and area | |
# 0.849 with just slope |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment