TonyLadson/Catchment_response.R

## Catchment_response.R
library(ggmap)
library(hydroGOF)
library(tidyverse)
library(readxl)
library(ggrepel)


# Analysis of time of concentration data
# Source: Appendix F of McDarmott, G. E. and Pilgrim, D. H. (1982) Design flood estimation for small catchments in NSW
# Department of National Development and Energy
# Australian Water Resources Council
# Research Project No. 78/104
# Technical Paper No. 73
# Australian Government Publishing Service
# Canberra, 1982
# Authors are at the School of Civil Engineering, The University of New South Wales


# Functions ---------------------------------------------------------------

# function to plot logarithmic minor breaks
# from https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot


log_breaks = function(maj, radix=10) {
  function(x) {
    minx         = floor(min(logb(x,radix), na.rm=T)) - 1
    maxx         = ceiling(max(logb(x,radix), na.rm=T)) + 1
    n_major      = maxx - minx + 1
    major_breaks = seq(minx, maxx, by=1)
    if (maj) {
      breaks = major_breaks
    } else {
      steps = logb(1:(radix-1),radix)
      breaks = rep(steps, times=n_major) +
        rep(major_breaks, each=radix-1)
    }
    radix^breaks
  }


}
scale_x_log_eng = function(..., radix=10) {
  scale_x_continuous(...,
                     trans=scales::log_trans(radix),
                     breaks=log_breaks(TRUE, radix),
                     minor_breaks=log_breaks(FALSE, radix))
}
scale_y_log_eng = function(..., radix=10) {
  scale_y_continuous(...,
                     trans=scales::log_trans(radix),
                     breaks=log_breaks(TRUE, radix),
                     minor_breaks=log_breaks(FALSE, radix))
}


# Load and munge data -----------------------------------------------------


# Load data from google drive

id <- "1haWTujT0XazONED5ZwH9O0yRcuet6QSc"
response <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id),
                  skip = 34,
                  col_names = FALSE)


names(response) <- c('gauge_num', 'station_name', 'waterway_name',
                     'latitude', 'longitude', 'authority',
                     'time_hrs', 'source', 'area_alt', 'area',
                     'stream_length', 'slope_e', 'slope_e_error',
                     'slope_a', 'slope_a_error')

# gauge_num = gauge number
# station_name = station name
# waterway_name = waterway name
# latitude = latitude
# longitude = longitude
# authority = authority
# time_hrs = catchment response time in hours
# source = source of data (see data file)
# area_alt = catchment area obtained from Bureau of Meteorlogy
#            either from the Water Resources Station Catalogue or Water Data Online (sq-km)
# area = area from the original source (se_km)
# slope_e  = equal area slope
# slop_e_error = some data in the original document had E after the numerical value.
#                I've put the E in a separate column (and assumed it meant error)
# slope_a = main stream average slope
# slope_a_error = some data in the original document had E after the numerical value.
#                I've put the E in a separate column (and assumed it meant error)


glimpse(response)


# Map gauges --------------------------------------------------------------


# You need an API key for this to work see ?register_google

aust_map <- ggmap(get_map(location = c(lon =147 , lat = -29), zoom = 5), extent = 'device')
# Centre on the half way up the East Coast

aust_map

# Add points were we have the data
p <- aust_map +
  geom_point(aes(x = longitude, y = latitude), colour = 'red', size = 0.025, data = response) +
  theme_minimal(base_size=5) +
  labs(x = NULL, y = NULL)

p
# Save for webpage

ggsave(file.path('../figures', 'gauge_map.png'), p, width = 4, height = 4)


# Response time as a function of area -------------------------------------


response %>%
  summarise(min(area), max(area), min(time_hrs), max(time_hrs))

# # A tibble: 1 x 4
#         `min(area)` `max(area)` `min(time_hrs)` `max(time_hrs)`
# <dbl>       <dbl>           <dbl>           <dbl>
#   1        0.04        1092            0.15            16.4


m_area <- lm(log10(time_hrs) ~ log10(area), data = response)

# Coefficients:
#   (Intercept)  log10(area)
# -0.1215       0.3830

summary(m_area)

# Call:
#   lm(formula = log10(time_hrs) ~ log10(area), data = response)
#
# Residuals:
#   Min       1Q   Median       3Q      Max
# -0.54083 -0.09732  0.01513  0.13669  0.36459
#
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
# (Intercept) -0.12150    0.03000  -4.049 0.000105 ***
#   log10(area)  0.38298    0.01657  23.119  < 2e-16 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.1878 on 94 degrees of freedom
# Multiple R-squared:  0.8504,	Adjusted R-squared:  0.8488
# F-statistic: 534.5 on 1 and 94 DF,  p-value: < 2.2e-16

# Intercept


10^coef(m_area)[1]
#0.7560973

# Power
coef(m_area)[2]
# log10(area)
# 0.3828074

# So the fitted equation is the same as Pilgrim and McDermott
# 0.76A^0.38


p <- response %>% ggplot(aes(x = area, y = time_hrs)) +
  geom_point(size = 0.1) +
  scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
                  labels = scales::comma) +
  scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.01, 30)) +
  geom_smooth(method = lm, se = FALSE, size = 0.3)

p

# Add prediction intervals

area_seq <-   data_frame(area = seq(0.05, 1200, length.out = 1000)) # sequence for plotting


predict_df <- as.data.frame(predict.lm(m_area,
                        newdata = area_seq,
                        interval = 'prediction'))  %>%
  bind_cols(area_seq) %>%
  mutate(fit = 10^fit,  # transform so points plot correctly
         lwr = 10^lwr,
         upr = 10^upr)


# Add upper and lower prediction intervals to plot

p <- p +
  geom_line(data = predict_df, aes(x = area, y = lwr),
            colour = 'black',
            size = 0.1,
            linetype = 'dashed') +
  geom_line(data = predict_df, aes(x = area, y = upr),
            colour = 'black',
            size = 0.1,
            linetype = 'dashed') +
  theme_grey(base_size = 7)

p

ggsave(file.path('../figures', 'toc_area.png'), p, width = 4, height = 3)


# Prediction interval for a 100 km-sq catchment

10^predict.lm(m_area, newdata = data_frame(area = 100), interval = 'prediction')
#     fit      lwr      upr
#  4.407502 1.848486 10.50918

# plot predicted and actual characteristic time

# Natural scale
response %>%
  mutate(PM_predict = 10^predict(m_area)) %>%
  ggplot(aes(x = PM_predict, y = time_hrs)) +
  geom_point() +
  geom_smooth( se = FALSE, linetype = 'dashed') +
  geom_abline(slope = 1, intercept = 0) +
  scale_x_continuous(name = 'Predicted time (h)') +
  scale_y_continuous(name = 'Observed time (h)')


# Log scale
response %>%
  mutate(PM_predict = 10^predict(m_area)) %>%
  ggplot(aes(x = PM_predict, y = time_hrs)) +
  geom_point() +
  geom_smooth( se = FALSE, linetype = 'dashed') +
  geom_abline(slope = 1, intercept = 0) +
  scale_x_log_eng(name = 'Pilgrim McDermott Predicted time (h)') +
  scale_y_log_eng(name = 'Observed time (h)') +
  annotate(geom = 'text', x = 10, y = 0.3, label = 'data', hjust = 'left') +
  annotate(geom = 'segment', x = 7, xend = 9,
           y = 0.3, yend = 0.3,
           colour = 'blue',
           linetype = 'dashed') +
  annotate(geom = 'text', x = 10, y = 0.4, label = '1:1', hjust = 'left') +
  annotate(geom = 'segment', x = 7, xend = 9,
           y = 0.4, yend = 0.4,
           colour = 'black')


# Nash-Sutcliffe coefficient of efficiency
response %>%
  mutate(PM_predict = 10^predict(m_area)) %>%
  summarise(nse = hydroGOF::NSE(PM_predict, time_hrs))
# # A tibble: 1 x 1
# nse
# <dbl>
#   1 0.696

# Calculating NSE in the log domain
response %>%
  mutate(PM_predict = 10^predict(m_area)) %>%
  summarise(nse = hydroGOF::NSE(PM_predict, time_hrs, FUN = log10))
# 0.848


# Catchment response as a function of slope -------------------------------

response %>%
  summarise(min(slope_e), max(slope_e), min(time_hrs), max(time_hrs))

#           `min(slope_e)` `max(slope_e)` `min(time_hrs)` `max(time_hrs)`
# <dbl>          <dbl>           <dbl>           <dbl>
#   1           1.38            201            0.15            16.4


p <- response %>% ggplot(aes(x = slope_e, y = time_hrs)) +
  geom_point(size = 0.1) +
  scale_x_log_eng(name = 'Slope (m/km)',  #limits = c(1, 150)
                  labels = scales::comma) +
  scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.1, 22)) +
  geom_smooth(method = lm, se = FALSE, size = 0.3) +
  theme_grey(base_size = 7)


p

ggsave(file.path('../figures', 'toc_slope.png'), p, width = 4, height = 3)

m_slope <- lm(log(time_hrs) ~ log(slope_e), data = response)
summary(m_slope)

# Call:
#   lm(formula = log(time_hrs) ~ log(slope_e), data = response)
#
# Residuals:
#   Min      1Q  Median      3Q     Max
# -1.6480 -0.5351  0.1033  0.5338  1.4076
#
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
# (Intercept)   3.24165    0.19175   16.91   <2e-16 ***
#   log(slope_e) -0.76101    0.05941  -12.81   <2e-16 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.6747 on 94 degrees of freedom
# Multiple R-squared:  0.6358,	Adjusted R-squared:  0.6319
# F-statistic: 164.1 on 1 and 94 DF,  p-value: < 2.2e-16


# Response as a function of stream_length ---------------------------------


response %>%
  summarise(min(stream_length), max(stream_length))

# # A tibble: 1 x 2
# `min(stream_length)` `max(stream_length)`
# <dbl>                <dbl>
#   1                 0.33                  123
# >


p <- response %>%
  ggplot(aes(x = stream_length, y = time_hrs)) +
  geom_point(size = 0.1) +
  scale_x_continuous(name = 'Stream length (km)') +
  scale_y_continuous(name = 'Catchment response time (hours)') +
  geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
  theme_gray(base_size = 7)


ggsave(file.path('../figures', 'toc_stream_length.png'), p, width = 4, height = 3)


m_stream_length <- lm(time_hrs ~ stream_length, data = response)
summary(m_stream_length)


# Relationship between area and stream_length -----------------------------

p <- response %>%
  ggplot(aes(x = area, y = stream_length)) +
  geom_point(size = 0.1) +
  scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
                  labels = scales::comma) +
  scale_y_log_eng(name = 'Stream length (km)',
                  labels = scales::comma) +
  geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
  theme_gray(base_size = 7)

ggsave(file.path('../figures', 'area_stream_length.png'), p, width = 4, height = 3)


m_area_stream_length <- lm(log10(stream_length) ~ log10(area), data = response)
summary(m_area_stream_length)

# Call:
#   lm(formula = log10(stream_length) ~ log10(area), data = response)
#
# Residuals:
#   Min        1Q    Median        3Q       Max
# -0.233840 -0.055750  0.003031  0.060985  0.220033
#
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.233183   0.014462   16.12   <2e-16 ***
#   log10(area) 0.566083   0.008041   70.40   <2e-16 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.09047 on 93 degrees of freedom
# Multiple R-squared:  0.9816,	Adjusted R-squared:  0.9814
# F-statistic:  4956 on 1 and 93 DF,  p-value: < 2.2e-16


# Log scale
response %>%
  ggplot(aes(x = area, y = stream_length)) +
  geom_point() +
  scale_x_continuous(name = bquote('Area ('*km^2*')'),
                     limits = c(0.01, 2000),
                     trans = 'sqrt',
                     labels = scales::comma) +
  scale_y_continuous(name = 'Stream length (km)',
                  labels = scales::comma) +
  geom_smooth()


# Relationship between area and slope -------------------------------------

response %>%
  ggplot(aes(x = area, y = slope_e)) +
  geom_point() +
  scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
                  labels = scales::comma) +
  scale_y_log_eng(name = 'Slope',  limits = c(1, 220),
                  labels = scales::comma) +
  geom_smooth()


# Web version
p <- response %>%
  ggplot(aes(x = area, y = slope_e)) +
  geom_point(size = 0.1) +
  scale_x_log_eng(name = bquote('Area ('*km^2*')'), limits = c(0.01, 2000),
                  labels = scales::comma) +
  scale_y_log_eng(name = 'Slope (m/km)',  limits = c(1, 220),
                  labels = scales::comma) +
  geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
  theme_gray(base_size = 7)

ggsave(file.path('../figures', 'area_slope.png'), p, width = 4, height = 3)


m_area_slope <- lm(log(slope_e) ~ log(area), data = response)
summary(m_area_slope)

# Call:
#   lm(formula = log(slope_e) ~ log(area), data = response)
#
# Residuals:
#   Min       1Q   Median       3Q      Max
# -1.92470 -0.40829  0.03406  0.44907  1.36601
#
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
# (Intercept)  4.15078    0.10837   38.30   <2e-16 ***
#   log(area)   -0.35477    0.02599  -13.65   <2e-16 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.6782 on 94 degrees of freedom
# Multiple R-squared:  0.6647,	Adjusted R-squared:  0.6612
# F-statistic: 186.4 on 1 and 94 DF,  p-value: < 2.2e-16

# Slope decreases as catchmen area increases


# Catchment response as a function of slope and area -------------------------------

m_slope_area <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area), data = response)
summary(m_slope_area)

# Call:
#   lm(formula = log10(time_hrs) ~ log10(slope_e) + log10(area),
#      data = response)
#
# Residuals:
#   Min       1Q   Median       3Q      Max
# -0.51656 -0.07466  0.02242  0.12641  0.35864
#
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
# (Intercept)     0.11195    0.12035   0.930   0.3547
# log10(slope_e) -0.12950    0.06472  -2.001   0.0483 *
#   log10(area)     0.33704    0.02816  11.968   <2e-16 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.1848 on 93 degrees of freedom
# Multiple R-squared:  0.8566,	Adjusted R-squared:  0.8535
# F-statistic: 277.8 on 2 and 93 DF,  p-value: < 2.2e-16


# Catchment response as a function area + slope + stream length --------------------------------------------

m_slope_area_stream_length <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area) + stream_length, data = response)
summary(m_slope_area_stream_length)

# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
# (Intercept)     0.1452158  0.1298496   1.118   0.2664
# log10(slope_e) -0.1482061  0.0702580  -2.109   0.0377 *
#   log10(area)     0.3457835  0.0308687  11.202   <2e-16 ***
#   stream_length  -0.0008966  0.0012429  -0.721   0.4725
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.1863 on 91 degrees of freedom
# Multiple R-squared:  0.8555,	Adjusted R-squared:  0.8507
# F-statistic: 179.6 on 3 and 91 DF,  p-value: < 2.2e-16

# No value in including stream length


# Best model --------------------------------------------------------------

# McDermott and Pilgrim decided to use a model just based on area, rather than slope and area
# Lets see if that is justified


anova(m_slope_area, m_area)

# Analysis of Variance Table
#
# Model 1: log10(time_hrs) ~ log10(slope_e) + log10(area)
# Model 2: log10(time_hrs) ~ log10(area)
# Res.Df    RSS Df Sum of Sq      F  Pr(>F)
# 1     93 3.1771
# 2     94 3.3139 -1  -0.13677 4.0034 0.04832 *

# Some value in including area but not much

AIC(m_slope)
# 200.9

AIC(m_area)
# -44.7

AIC(m_slope_area)
# -46.8

BIC(m_slope)
# 208.5

BIC(m_area)
# -37.0

BIC(m_slope_area)
# -36.5

# So leaving slope out is reasonable.
# AIC suggests including slope and area
# BIC suggests just area
# The adjusted R-squared is
# 0.854 with slope and area
# 0.849 with just slope
	library(ggmap)
	library(hydroGOF)
	library(tidyverse)
	library(readxl)
	library(ggrepel)


	# Analysis of time of concentration data
	# Source: Appendix F of McDarmott, G. E. and Pilgrim, D. H. (1982) Design flood estimation for small catchments in NSW
	# Department of National Development and Energy
	# Australian Water Resources Council
	# Research Project No. 78/104
	# Technical Paper No. 73
	# Australian Government Publishing Service
	# Canberra, 1982
	# Authors are at the School of Civil Engineering, The University of New South Wales



	# Functions ---------------------------------------------------------------

	# function to plot logarithmic minor breaks
	# from https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot


	log_breaks = function(maj, radix=10) {
	function(x) {
	minx = floor(min(logb(x,radix), na.rm=T)) - 1
	maxx = ceiling(max(logb(x,radix), na.rm=T)) + 1
	n_major = maxx - minx + 1
	major_breaks = seq(minx, maxx, by=1)
	if (maj) {
	breaks = major_breaks
	} else {
	steps = logb(1:(radix-1),radix)
	breaks = rep(steps, times=n_major) +
	rep(major_breaks, each=radix-1)
	}
	radix^breaks
	}




	}
	scale_x_log_eng = function(..., radix=10) {
	scale_x_continuous(...,
	trans=scales::log_trans(radix),
	breaks=log_breaks(TRUE, radix),
	minor_breaks=log_breaks(FALSE, radix))
	}
	scale_y_log_eng = function(..., radix=10) {
	scale_y_continuous(...,
	trans=scales::log_trans(radix),
	breaks=log_breaks(TRUE, radix),
	minor_breaks=log_breaks(FALSE, radix))
	}




	# Load and munge data -----------------------------------------------------



	# Load data from google drive

	id <- "1haWTujT0XazONED5ZwH9O0yRcuet6QSc"
	response <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id),
	skip = 34,
	col_names = FALSE)


	names(response) <- c('gauge_num', 'station_name', 'waterway_name',
	'latitude', 'longitude', 'authority',
	'time_hrs', 'source', 'area_alt', 'area',
	'stream_length', 'slope_e', 'slope_e_error',
	'slope_a', 'slope_a_error')

	# gauge_num = gauge number
	# station_name = station name
	# waterway_name = waterway name
	# latitude = latitude
	# longitude = longitude
	# authority = authority
	# time_hrs = catchment response time in hours
	# source = source of data (see data file)
	# area_alt = catchment area obtained from Bureau of Meteorlogy
	# either from the Water Resources Station Catalogue or Water Data Online (sq-km)
	# area = area from the original source (se_km)
	# slope_e = equal area slope
	# slop_e_error = some data in the original document had E after the numerical value.
	# I've put the E in a separate column (and assumed it meant error)
	# slope_a = main stream average slope
	# slope_a_error = some data in the original document had E after the numerical value.
	# I've put the E in a separate column (and assumed it meant error)




	glimpse(response)


	# Map gauges --------------------------------------------------------------


	# You need an API key for this to work see ?register_google

	aust_map <- ggmap(get_map(location = c(lon =147 , lat = -29), zoom = 5), extent = 'device')
	# Centre on the half way up the East Coast

	aust_map

	# Add points were we have the data
	p <- aust_map +
	geom_point(aes(x = longitude, y = latitude), colour = 'red', size = 0.025, data = response) +
	theme_minimal(base_size=5) +
	labs(x = NULL, y = NULL)

	p
	# Save for webpage

	ggsave(file.path('../figures', 'gauge_map.png'), p, width = 4, height = 4)



	# Response time as a function of area -------------------------------------


	response %>%
	summarise(min(area), max(area), min(time_hrs), max(time_hrs))

	# # A tibble: 1 x 4
	# `min(area)` `max(area)` `min(time_hrs)` `max(time_hrs)`
	# <dbl> <dbl> <dbl> <dbl>
	# 1 0.04 1092 0.15 16.4



	m_area <- lm(log10(time_hrs) ~ log10(area), data = response)

	# Coefficients:
	# (Intercept) log10(area)
	# -0.1215 0.3830

	summary(m_area)

	# Call:
	# lm(formula = log10(time_hrs) ~ log10(area), data = response)
	#
	# Residuals:
	# Min 1Q Median 3Q Max
	# -0.54083 -0.09732 0.01513 0.13669 0.36459
	#
	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) -0.12150 0.03000 -4.049 0.000105 ***
	# log10(area) 0.38298 0.01657 23.119 < 2e-16 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.1878 on 94 degrees of freedom
	# Multiple R-squared: 0.8504, Adjusted R-squared: 0.8488
	# F-statistic: 534.5 on 1 and 94 DF, p-value: < 2.2e-16

	# Intercept


	10^coef(m_area)[1]
	#0.7560973

	# Power
	coef(m_area)[2]
	# log10(area)
	# 0.3828074

	# So the fitted equation is the same as Pilgrim and McDermott
	# 0.76A^0.38


	p <- response %>% ggplot(aes(x = area, y = time_hrs)) +
	geom_point(size = 0.1) +
	scale_x_log_eng(name = bquote('Area ('km^2')'), limits = c(0.01, 2000),
	labels = scales::comma) +
	scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.01, 30)) +
	geom_smooth(method = lm, se = FALSE, size = 0.3)

	p

	# Add prediction intervals

	area_seq <- data_frame(area = seq(0.05, 1200, length.out = 1000)) # sequence for plotting



	predict_df <- as.data.frame(predict.lm(m_area,
	newdata = area_seq,
	interval = 'prediction')) %>%
	bind_cols(area_seq) %>%
	mutate(fit = 10^fit, # transform so points plot correctly
	lwr = 10^lwr,
	upr = 10^upr)



	# Add upper and lower prediction intervals to plot

	p <- p +
	geom_line(data = predict_df, aes(x = area, y = lwr),
	colour = 'black',
	size = 0.1,
	linetype = 'dashed') +
	geom_line(data = predict_df, aes(x = area, y = upr),
	colour = 'black',
	size = 0.1,
	linetype = 'dashed') +
	theme_grey(base_size = 7)

	p

	ggsave(file.path('../figures', 'toc_area.png'), p, width = 4, height = 3)


	# Prediction interval for a 100 km-sq catchment

	10^predict.lm(m_area, newdata = data_frame(area = 100), interval = 'prediction')
	# fit lwr upr
	# 4.407502 1.848486 10.50918

	# plot predicted and actual characteristic time

	# Natural scale
	response %>%
	mutate(PM_predict = 10^predict(m_area)) %>%
	ggplot(aes(x = PM_predict, y = time_hrs)) +
	geom_point() +
	geom_smooth( se = FALSE, linetype = 'dashed') +
	geom_abline(slope = 1, intercept = 0) +
	scale_x_continuous(name = 'Predicted time (h)') +
	scale_y_continuous(name = 'Observed time (h)')


	# Log scale
	response %>%
	mutate(PM_predict = 10^predict(m_area)) %>%
	ggplot(aes(x = PM_predict, y = time_hrs)) +
	geom_point() +
	geom_smooth( se = FALSE, linetype = 'dashed') +
	geom_abline(slope = 1, intercept = 0) +
	scale_x_log_eng(name = 'Pilgrim McDermott Predicted time (h)') +
	scale_y_log_eng(name = 'Observed time (h)') +
	annotate(geom = 'text', x = 10, y = 0.3, label = 'data', hjust = 'left') +
	annotate(geom = 'segment', x = 7, xend = 9,
	y = 0.3, yend = 0.3,
	colour = 'blue',
	linetype = 'dashed') +
	annotate(geom = 'text', x = 10, y = 0.4, label = '1:1', hjust = 'left') +
	annotate(geom = 'segment', x = 7, xend = 9,
	y = 0.4, yend = 0.4,
	colour = 'black')


	# Nash-Sutcliffe coefficient of efficiency
	response %>%
	mutate(PM_predict = 10^predict(m_area)) %>%
	summarise(nse = hydroGOF::NSE(PM_predict, time_hrs))
	# # A tibble: 1 x 1
	# nse
	# <dbl>
	# 1 0.696

	# Calculating NSE in the log domain
	response %>%
	mutate(PM_predict = 10^predict(m_area)) %>%
	summarise(nse = hydroGOF::NSE(PM_predict, time_hrs, FUN = log10))
	# 0.848


	# Catchment response as a function of slope -------------------------------

	response %>%
	summarise(min(slope_e), max(slope_e), min(time_hrs), max(time_hrs))

	# `min(slope_e)` `max(slope_e)` `min(time_hrs)` `max(time_hrs)`
	# <dbl> <dbl> <dbl> <dbl>
	# 1 1.38 201 0.15 16.4




	p <- response %>% ggplot(aes(x = slope_e, y = time_hrs)) +
	geom_point(size = 0.1) +
	scale_x_log_eng(name = 'Slope (m/km)', #limits = c(1, 150)
	labels = scales::comma) +
	scale_y_log_eng(name = 'Catchment response time (hours)', limits = c(0.1, 22)) +
	geom_smooth(method = lm, se = FALSE, size = 0.3) +
	theme_grey(base_size = 7)


	p

	ggsave(file.path('../figures', 'toc_slope.png'), p, width = 4, height = 3)

	m_slope <- lm(log(time_hrs) ~ log(slope_e), data = response)
	summary(m_slope)

	# Call:
	# lm(formula = log(time_hrs) ~ log(slope_e), data = response)
	#
	# Residuals:
	# Min 1Q Median 3Q Max
	# -1.6480 -0.5351 0.1033 0.5338 1.4076
	#
	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) 3.24165 0.19175 16.91 <2e-16 ***
	# log(slope_e) -0.76101 0.05941 -12.81 <2e-16 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.6747 on 94 degrees of freedom
	# Multiple R-squared: 0.6358, Adjusted R-squared: 0.6319
	# F-statistic: 164.1 on 1 and 94 DF, p-value: < 2.2e-16



	# Response as a function of stream_length ---------------------------------



	response %>%
	summarise(min(stream_length), max(stream_length))

	# # A tibble: 1 x 2
	# `min(stream_length)` `max(stream_length)`
	# <dbl> <dbl>
	# 1 0.33 123
	# >


	p <- response %>%
	ggplot(aes(x = stream_length, y = time_hrs)) +
	geom_point(size = 0.1) +
	scale_x_continuous(name = 'Stream length (km)') +
	scale_y_continuous(name = 'Catchment response time (hours)') +
	geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
	theme_gray(base_size = 7)


	ggsave(file.path('../figures', 'toc_stream_length.png'), p, width = 4, height = 3)


	m_stream_length <- lm(time_hrs ~ stream_length, data = response)
	summary(m_stream_length)


	# Relationship between area and stream_length -----------------------------

	p <- response %>%
	ggplot(aes(x = area, y = stream_length)) +
	geom_point(size = 0.1) +
	scale_x_log_eng(name = bquote('Area ('km^2')'), limits = c(0.01, 2000),
	labels = scales::comma) +
	scale_y_log_eng(name = 'Stream length (km)',
	labels = scales::comma) +
	geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
	theme_gray(base_size = 7)

	ggsave(file.path('../figures', 'area_stream_length.png'), p, width = 4, height = 3)


	m_area_stream_length <- lm(log10(stream_length) ~ log10(area), data = response)
	summary(m_area_stream_length)

	# Call:
	# lm(formula = log10(stream_length) ~ log10(area), data = response)
	#
	# Residuals:
	# Min 1Q Median 3Q Max
	# -0.233840 -0.055750 0.003031 0.060985 0.220033
	#
	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) 0.233183 0.014462 16.12 <2e-16 ***
	# log10(area) 0.566083 0.008041 70.40 <2e-16 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.09047 on 93 degrees of freedom
	# Multiple R-squared: 0.9816, Adjusted R-squared: 0.9814
	# F-statistic: 4956 on 1 and 93 DF, p-value: < 2.2e-16



	# Log scale
	response %>%
	ggplot(aes(x = area, y = stream_length)) +
	geom_point() +
	scale_x_continuous(name = bquote('Area ('km^2')'),
	limits = c(0.01, 2000),
	trans = 'sqrt',
	labels = scales::comma) +
	scale_y_continuous(name = 'Stream length (km)',
	labels = scales::comma) +
	geom_smooth()




	# Relationship between area and slope -------------------------------------

	response %>%
	ggplot(aes(x = area, y = slope_e)) +
	geom_point() +
	scale_x_log_eng(name = bquote('Area ('km^2')'), limits = c(0.01, 2000),
	labels = scales::comma) +
	scale_y_log_eng(name = 'Slope', limits = c(1, 220),
	labels = scales::comma) +
	geom_smooth()


	# Web version
	p <- response %>%
	ggplot(aes(x = area, y = slope_e)) +
	geom_point(size = 0.1) +
	scale_x_log_eng(name = bquote('Area ('km^2')'), limits = c(0.01, 2000),
	labels = scales::comma) +
	scale_y_log_eng(name = 'Slope (m/km)', limits = c(1, 220),
	labels = scales::comma) +
	geom_smooth(method = 'lm', se = FALSE, size = 0.3) +
	theme_gray(base_size = 7)

	ggsave(file.path('../figures', 'area_slope.png'), p, width = 4, height = 3)




	m_area_slope <- lm(log(slope_e) ~ log(area), data = response)
	summary(m_area_slope)

	# Call:
	# lm(formula = log(slope_e) ~ log(area), data = response)
	#
	# Residuals:
	# Min 1Q Median 3Q Max
	# -1.92470 -0.40829 0.03406 0.44907 1.36601
	#
	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) 4.15078 0.10837 38.30 <2e-16 ***
	# log(area) -0.35477 0.02599 -13.65 <2e-16 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.6782 on 94 degrees of freedom
	# Multiple R-squared: 0.6647, Adjusted R-squared: 0.6612
	# F-statistic: 186.4 on 1 and 94 DF, p-value: < 2.2e-16

	# Slope decreases as catchmen area increases


	# Catchment response as a function of slope and area -------------------------------

	m_slope_area <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area), data = response)
	summary(m_slope_area)

	# Call:
	# lm(formula = log10(time_hrs) ~ log10(slope_e) + log10(area),
	# data = response)
	#
	# Residuals:
	# Min 1Q Median 3Q Max
	# -0.51656 -0.07466 0.02242 0.12641 0.35864
	#
	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) 0.11195 0.12035 0.930 0.3547
	# log10(slope_e) -0.12950 0.06472 -2.001 0.0483 *
	# log10(area) 0.33704 0.02816 11.968 <2e-16 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.1848 on 93 degrees of freedom
	# Multiple R-squared: 0.8566, Adjusted R-squared: 0.8535
	# F-statistic: 277.8 on 2 and 93 DF, p-value: < 2.2e-16



	# Catchment response as a function area + slope + stream length --------------------------------------------

	m_slope_area_stream_length <- lm(log10(time_hrs) ~ log10(slope_e) + log10(area) + stream_length, data = response)
	summary(m_slope_area_stream_length)

	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) 0.1452158 0.1298496 1.118 0.2664
	# log10(slope_e) -0.1482061 0.0702580 -2.109 0.0377 *
	# log10(area) 0.3457835 0.0308687 11.202 <2e-16 ***
	# stream_length -0.0008966 0.0012429 -0.721 0.4725
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.1863 on 91 degrees of freedom
	# Multiple R-squared: 0.8555, Adjusted R-squared: 0.8507
	# F-statistic: 179.6 on 3 and 91 DF, p-value: < 2.2e-16

	# No value in including stream length


	# Best model --------------------------------------------------------------

	# McDermott and Pilgrim decided to use a model just based on area, rather than slope and area
	# Lets see if that is justified


	anova(m_slope_area, m_area)

	# Analysis of Variance Table
	#
	# Model 1: log10(time_hrs) ~ log10(slope_e) + log10(area)
	# Model 2: log10(time_hrs) ~ log10(area)
	# Res.Df RSS Df Sum of Sq F Pr(>F)
	# 1 93 3.1771
	# 2 94 3.3139 -1 -0.13677 4.0034 0.04832 *

	# Some value in including area but not much

	AIC(m_slope)
	# 200.9

	AIC(m_area)
	# -44.7

	AIC(m_slope_area)
	# -46.8

	BIC(m_slope)
	# 208.5

	BIC(m_area)
	# -37.0

	BIC(m_slope_area)
	# -36.5

	# So leaving slope out is reasonable.
	# AIC suggests including slope and area
	# BIC suggests just area
	# The adjusted R-squared is
	# 0.854 with slope and area
	# 0.849 with just slope