Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active March 12, 2019 10:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/c8f1cda4d334291402d857b1ce1320b8 to your computer and use it in GitHub Desktop.
Save TonyLadson/c8f1cda4d334291402d857b1ce1320b8 to your computer and use it in GitHub Desktop.
Time of concentration, analysis of data collected by Pilgrim and McDermott. See https://wordpress.com/post/tonyladson.wordpress.com/10599
library(ggmap)
library(hydroGOF)
library(tidyverse)
library(readxl)
library(ggrepel)
# Analysis of time of concentration data
# Source: Appendix F of McDarmott, G. E. and Pilgrim, D. H. (1982) Design flood estimation for small catchments in NSW
# Department of National Development and Energy
# Australian Water Resources Council
# Research Project No. 78/104
# Technical Paper No. 73
# Australian Government Publishing Service
# Canberra, 1982
# Authors are at the School of Civil Engineering, The University of New South Wales
# Functions ---------------------------------------------------------------
# function to plot logarithmic minor breaks
# from https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot
log_breaks = function(maj, radix=10) {
function(x) {
minx = floor(min(logb(x,radix), na.rm=T)) - 1
maxx = ceiling(max(logb(x,radix), na.rm=T)) + 1
n_major = maxx - minx + 1
major_breaks = seq(minx, maxx, by=1)
if (maj) {
breaks = major_breaks
} else {
steps = logb(1:(radix-1),radix)
breaks = rep(steps, times=n_major) +
rep(major_breaks, each=radix-1)
}
radix^breaks
}
}
scale_x_log_eng = function(..., radix=10) {
scale_x_continuous(...,
trans=scales::log_trans(radix),
breaks=log_breaks(TRUE, radix),
minor_breaks=log_breaks(FALSE, radix))
}
scale_y_log_eng = function(..., radix=10) {
scale_y_continuous(...,
trans=scales::log_trans(radix),
breaks=log_breaks(TRUE, radix),
minor_breaks=log_breaks(FALSE, radix))
}
# Load and munge data -----------------------------------------------------
# Load data from google drive
id <- "1haWTujT0XazONED5ZwH9O0yRcuet6QSc"
response <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id),
skip = 34,
col_names = FALSE)
names(response) <- c('gauge_num', 'station_name', 'waterway_name',
'latitude', 'longitude', 'authority',
'time_hrs', 'source', 'area_alt', 'area',
'stream_length', 'slope_e', 'slope_e_error',
'slope_a', 'slope_a_error')
# gauge_num = gauge number
# station_name = station name
# waterway_name = waterway name
# latitude = latitude
# longitude = longitude
# authority = authority
# time_hrs = catchment response time in hours
# source = source of data (see data file)
# area_alt = catchment area obtained from Bureau of Meteorlogy
# either from the Water Resources Station Catalogue or Water Data Online (sq-km)
# area = area from the original source (se_km)
# slope_e = equal area slope
# slop_e_error = some data in the original document had E after the numerical value.
# I've put the E in a separate column (and assumed it meant error)
# slope_a = main stream average slope
# slope_a_error = some data in the original document had E after the numerical value.
# I've put the E in a separate column (and assumed it meant error)
glimpse(response)
# Map gauges --------------------------------------------------------------
# You need an API key for this to work see ?register_google
aust_map <- ggmap(get_map(location = c(lon =147 , lat = -29), zoom = 5), extent = 'device')
# Centre on the half way up the East Coast
aust_map
# Add points were we have the data
p <- aust_map +
geom_point(aes(x = longitude, y = latitude), colour = 'red', size = 0.025, data = response) +
theme_minimal(base_size=5) +
labs(x = NULL, y = NULL)
p
# Save for webpage
ggsave(file.path('../figures', 'gauge_map.png'), p, width = 4, height = 4)
# Response time as a function of area -------------------------------------
response %>%
summarise(min(area), max(area), min(time_hrs), max(time_hrs))
# # A tibble: 1 x 4
# `min(area)` `max(area)` `min(time_hrs)` `max(time_hrs)`
# <dbl> <dbl> <dbl> <dbl>
# 1 0.04 1092 0.15 16.4
m_area <- lm(log10(time_hrs) ~ log10(area), data = response)
# Coefficients:
# (Intercept) log10(area)
# -0.1215 0.3830
summary(m_area)
# Call:
# lm(formula = log10(time_hrs) ~ log10(area), data = response)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.54083 -0.09732 0.01513 0.13669 0.36459
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) -0.12150 0.03000 -4.049 0.000105 ***
# log10(area) 0.38298 0.01657 23.119 < 2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.1878 on 94 degrees of freedom
# Multiple R-squared: 0.8504, Adjusted R-squared: 0.8488
# F-statistic: 534.5 on 1 and 94 DF, p-value: < 2.2e-16
# Intercept
10^coef(m_area)[1]
#0.7560973
# Power
coef(m_area)[2]
# log10(area)
# 0.3828074
# So the fitted equation is the same as Pilgrim and McDermott
# 0.76A^0.38
p <- response %>% ggplot(aes(x = area, y = time_hrs)) +
geom_point(size = 0.1) +
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
labels = scales::comma) +
scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.01, 30)) +
geom_smooth(method = lm, se = FALSE, size = 0.3)
p
# Add prediction intervals
area_seq <- data_frame(area = seq(0.05, 1200, length.out = 1000)) # sequence for plotting
predict_df <- as.data.frame(predict.lm(m_area,
newdata = area_seq,
interval = 'prediction')) %>%
bind_cols(area_seq) %>%
mutate(fit = 10^fit, # transform so points plot correctly
lwr = 10^lwr,
upr = 10^upr)
# Add upper and lower prediction intervals to plot
p <- p +
geom_line(data = predict_df, aes(x = area, y = lwr),
colour = 'black',
size = 0.1,
linetype = 'dashed') +
geom_line(data = predict_df, aes(x = area, y = upr),
colour = 'black',
size = 0.1,
linetype = 'dashed') +
theme_grey(base_size = 7)
p
ggsave(file.path('../figures', 'toc_area.png'), p, width = 4, height = 3)
# Prediction interval for a 100 km-sq catchment
10^predict.lm(m_area, newdata = data_frame(area = 100), interval = 'prediction')
# fit lwr upr
# 4.407502 1.848486 10.50918
# plot predicted and actual characteristic time
# Natural scale
response %>%
mutate(PM_predict = 10^predict(m_area)) %>%
ggplot(aes(x = PM_predict, y = time_hrs)) +
geom_point() +
geom_smooth( se = FALSE, linetype = 'dashed') +
geom_abline(slope = 1, intercept = 0) +
scale_x_continuous(name = 'Predicted time (h)') +
scale_y_continuous(name = 'Observed time (h)')
# Log scale
response %>%
mutate(PM_predict = 10^predict(m_area)) %>%
ggplot(aes(x = PM_predict, y = time_hrs)) +
geom_point() +
geom_smooth( se = FALSE, linetype = 'dashed') +
geom_abline(slope = 1, intercept = 0) +
scale_x_log_eng(name = 'Pilgrim McDermott Predicted time (h)') +
scale_y_log_eng(name = 'Observed time (h)') +
annotate(geom = 'text', x = 10, y = 0.3, label = 'data', hjust = 'left') +
annotate(geom = 'segment', x = 7, xend = 9,
y = 0.3, yend = 0.3,
colour = 'blue',
linetype = 'dashed') +
annotate(geom = 'text', x = 10, y = 0.4, label = '1:1', hjust = 'left') +
annotate(geom = 'segment', x = 7, xend = 9,
y = 0.4, yend = 0.4,
colour = 'black')
# Nash-Sutcliffe coefficient of efficiency
response %>%
mutate(PM_predict = 10^predict(m_area)) %>%
summarise(nse = hydroGOF::NSE(PM_predict, time_hrs))
# # A tibble: 1 x 1
# nse
# <dbl>
# 1 0.696
# Calculating NSE in the log domain
response %>%
mutate(PM_predict = 10^predict(m_area)) %>%
summarise(nse = hydroGOF::NSE(PM_predict, time_hrs, FUN = log10))
# 0.848
# Catchment response as a function of slope -------------------------------
response %>%
summarise(min(slope_e), max(slope_e), min(time_hrs), max(time_hrs))
# `min(slope_e)` `max(slope_e)` `min(time_hrs)` `max(time_hrs)`
# <dbl> <dbl> <dbl> <dbl>
# 1 1.38 201 0.15 16.4
p <- response %>% ggplot(aes(x = slope_e, y = time_hrs)) +
geom_point(size = 0.1) +
scale_x_log_eng(name = 'Slope (m/km)', #limits = c(1, 150)
labels = scales::comma) +
scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.1, 22)) +
geom_smooth(method = lm, se = FALSE, size = 0.3) +
theme_grey(base_size = 7)
p
ggsave(file.path('../figures', 'toc_slope.png'), p, width = 4, height = 3)
m_slope <- lm(log(time_hrs) ~ log(slope_e), data = response)
summary(m_slope)
# Call:
# lm(formula = log(time_hrs) ~ log(slope_e), data = response)
#
# Residuals:
# Min 1Q Median 3Q Max
# -1.6480 -0.5351 0.1033 0.5338 1.4076
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 3.24165 0.19175 16.91 <2e-16 ***
# log(slope_e) -0.76101 0.05941 -12.81 <2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.6747 on 94 degrees of freedom
# Multiple R-squared: 0.6358, Adjusted R-squared: 0.6319
# F-statistic: 164.1 on 1 and 94 DF, p-value: < 2.2e-16
# Response as a function of stream_length ---------------------------------
response %>%
summarise(min(stream_length), max(stream_length))
# # A tibble: 1 x 2
# `min(stream_length)` `max(stream_length)`
# <dbl> <dbl>
# 1 0.33 123
# >
p <- response %>%
ggplot(aes(x = stream_length, y = time_hrs)) +
geom_point(size = 0.1) +
scale_x_continuous(name = 'Stream length (km)') +
scale_y_continuous(name = 'Catchment response time (hours)') +
geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
theme_gray(base_size = 7)
ggsave(file.path('../figures', 'toc_stream_length.png'), p, width = 4, height = 3)
m_stream_length <- lm(time_hrs ~ stream_length, data = response)
summary(m_stream_length)
# Relationship between area and stream_length -----------------------------
p <- response %>%
ggplot(aes(x = area, y = stream_length)) +
geom_point(size = 0.1) +
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
labels = scales::comma) +
scale_y_log_eng(name = 'Stream length (km)',
labels = scales::comma) +
geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
theme_gray(base_size = 7)
ggsave(file.path('../figures', 'area_stream_length.png'), p, width = 4, height = 3)
m_area_stream_length <- lm(log10(stream_length) ~ log10(area), data = response)
summary(m_area_stream_length)
# Call:
# lm(formula = log10(stream_length) ~ log10(area), data = response)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.233840 -0.055750 0.003031 0.060985 0.220033
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.233183 0.014462 16.12 <2e-16 ***
# log10(area) 0.566083 0.008041 70.40 <2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.09047 on 93 degrees of freedom
# Multiple R-squared: 0.9816, Adjusted R-squared: 0.9814
# F-statistic: 4956 on 1 and 93 DF, p-value: < 2.2e-16
# Log scale
response %>%
ggplot(aes(x = area, y = stream_length)) +
geom_point() +
scale_x_continuous(name = bquote('Area ('*km^2*')'),
limits = c(0.01, 2000),
trans = 'sqrt',
labels = scales::comma) +
scale_y_continuous(name = 'Stream length (km)',
labels = scales::comma) +
geom_smooth()
# Relationship between area and slope -------------------------------------
response %>%
ggplot(aes(x = area, y = slope_e)) +
geom_point() +
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
labels = scales::comma) +
scale_y_log_eng(name = 'Slope', limits = c(1, 220),
labels = scales::comma) +
geom_smooth()
# Web version
p <- response %>%
ggplot(aes(x = area, y = slope_e)) +
geom_point(size = 0.1) +
scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
labels = scales::comma) +
scale_y_log_eng(name = 'Slope (m/km)', limits = c(1, 220),
labels = scales::comma) +
geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
theme_gray(base_size = 7)
ggsave(file.path('../figures', 'area_slope.png'), p, width = 4, height = 3)
m_area_slope <- lm(log(slope_e) ~ log(area), data = response)
summary(m_area_slope)
# Call:
# lm(formula = log(slope_e) ~ log(area), data = response)
#
# Residuals:
# Min 1Q Median 3Q Max
# -1.92470 -0.40829 0.03406 0.44907 1.36601
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 4.15078 0.10837 38.30 <2e-16 ***
# log(area) -0.35477 0.02599 -13.65 <2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.6782 on 94 degrees of freedom
# Multiple R-squared: 0.6647, Adjusted R-squared: 0.6612
# F-statistic: 186.4 on 1 and 94 DF, p-value: < 2.2e-16
# Slope decreases as catchmen area increases
# Catchment response as a function of slope and area -------------------------------
m_slope_area <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area), data = response)
summary(m_slope_area)
# Call:
# lm(formula = log10(time_hrs) ~ log10(slope_e) + log10(area),
# data = response)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.51656 -0.07466 0.02242 0.12641 0.35864
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.11195 0.12035 0.930 0.3547
# log10(slope_e) -0.12950 0.06472 -2.001 0.0483 *
# log10(area) 0.33704 0.02816 11.968 <2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.1848 on 93 degrees of freedom
# Multiple R-squared: 0.8566, Adjusted R-squared: 0.8535
# F-statistic: 277.8 on 2 and 93 DF, p-value: < 2.2e-16
# Catchment response as a function area + slope + stream length --------------------------------------------
m_slope_area_stream_length <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area) + stream_length, data = response)
summary(m_slope_area_stream_length)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.1452158 0.1298496 1.118 0.2664
# log10(slope_e) -0.1482061 0.0702580 -2.109 0.0377 *
# log10(area) 0.3457835 0.0308687 11.202 <2e-16 ***
# stream_length -0.0008966 0.0012429 -0.721 0.4725
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.1863 on 91 degrees of freedom
# Multiple R-squared: 0.8555, Adjusted R-squared: 0.8507
# F-statistic: 179.6 on 3 and 91 DF, p-value: < 2.2e-16
# No value in including stream length
# Best model --------------------------------------------------------------
# McDermott and Pilgrim decided to use a model just based on area, rather than slope and area
# Lets see if that is justified
anova(m_slope_area, m_area)
# Analysis of Variance Table
#
# Model 1: log10(time_hrs) ~ log10(slope_e) + log10(area)
# Model 2: log10(time_hrs) ~ log10(area)
# Res.Df RSS Df Sum of Sq F Pr(>F)
# 1 93 3.1771
# 2 94 3.3139 -1 -0.13677 4.0034 0.04832 *
# Some value in including area but not much
AIC(m_slope)
# 200.9
AIC(m_area)
# -44.7
AIC(m_slope_area)
# -46.8
BIC(m_slope)
# 208.5
BIC(m_area)
# -37.0
BIC(m_slope_area)
# -36.5
# So leaving slope out is reasonable.
# AIC suggests including slope and area
# BIC suggests just area
# The adjusted R-squared is
# 0.854 with slope and area
# 0.849 with just slope
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment