TonyLadson/FDC_example.R

## FDC_example.R
library(tidyverse)
library(lubridate)
library(assertthat)


# Helper functions --------------------------------------------------------

# Function to plot minor breaks at a log scale
# https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot
log10_minor_break = function (...){
  function(x) {
    minx         = floor(min(log10(x), na.rm=T))-1;
    maxx         = ceiling(max(log10(x), na.rm=T))+1;
    n_major      = maxx-minx+1;
    major_breaks = seq(minx, maxx, by=1)
    minor_breaks =
      rep(log10(seq(1, 9, by=1)), times = n_major)+
      rep(major_breaks, each = 9)
    return(10^(minor_breaks))
  }
}

log10_major_break = function (...){
  function(x) {
    minx         = floor(min(log10(x), na.rm=T))-1;
    maxx         = ceiling(max(log10(x), na.rm=T))+1;
    n_major      = maxx-minx+1;
    major_breaks = seq(minx, maxx, by=1)
    return(10^(major_breaks))
  }
}


#______________________________________________________________________________________
# Read in the data


#
# Site 404216 BROKEN RIVER @ GOORAMBAT (CASEY WEIR H. GAUGE) Lat:-36.47353488 Long:145.9417393
# Data on google drive
# https://drive.google.com/open?id=1-g1rOfeOxquPow4YM1aifdEkdBboUzu4


id <- "1-g1rOfeOxquPow4YM1aifdEkdBboUzu4" # google file ID
flow_daily <- read_csv(file =sprintf("https://docs.google.com/uc?id=%s&export=download", id),
                       col_types = cols(
                         X1 = col_character(),
                         X2 = col_double(),
                         X3 = col_integer(),
                         X4 = col_double(),
                         X5 = col_integer(),
                         X6 = col_double(),
                         X7 = col_integer()
                       ),
                       col_names = FALSE,
                       skip = 17
)

names(flow_daily) <- c('date',
                       'flow_mean',
                       'qcode_mean',
                       'flow_min',
                       'qcode_min',
                       'flow_max',
                       'qcode_max')


#______________________________________________________________________________________
# Checks
# Missing data

colSums(is.na(flow_daily))

# Remove first line (contains missing data and add columns we'll use later)

flow_daily <- flow_daily %>%
  slice(2:nrow(flow_daily)) %>%  # There is only one missing value, which is the first, so lets remove it.
  mutate(date = as.Date(str_sub(date, 1, 10), format = '%d/%m/%Y')) %>%  # Convert time, just you days
  mutate(flow_year = year(date)) %>%
  mutate(flow_month = month(date)) %>%
  mutate(jday = yday(date))


# Check time step
# should be consistent between all values
x <- flow_daily %>%
  mutate(date_diff = date - lag(date)) %>%
  count(date_diff) # ok
res <- assert_that(x[1,2] == nrow(flow_daily)-1)


# Look for incomplete years

flow_daily %>%
  mutate(flow_year = year(date)) %>%
  group_by(flow_year) %>%
  summarise(days_measured = n()) %>%
  filter(!(days_measured %in% c(365,366))) # Years without 365 or 366 days

# Delete 1972 and 2017 (first and last year)

flow_daily <- flow_daily %>%
  filter(!(flow_year %in% c(1972, 2017)))

# Check the occurance of Zero values
colSums(flow_daily == 0)
# date  flow_mean qcode_mean   flow_min  qcode_min   flow_max  qcode_max  flow_year flow_month
# 0        560          0        783          0        557          0          0          0
# jday
# 0


# Flow duration curves ----------------------------------------------------


# FDC using all flows

flow_daily %>%
  mutate(my_rank = 1- percent_rank(flow_mean)) %>%
  mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
  ggplot(aes(x = my_rank, y = flow_mean)) +
  geom_line() +
  scale_y_continuous(name = 'Mean daily flow (ML/d)',
                     trans = 'log10',
                     breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
                     minor_breaks = log10_minor_break(),
                     limits = c(0.01, 10000),
                     labels = c(0.01, 0.1, 1, 10, 100, 1000, '10,000')) +
  scale_x_continuous(name = 'Percentage of time flow exceeded',
                     trans = 'probit',
                     limits = c(0.01, 0.99),
                     breaks = c(0.01, 0.1,0.25,0.5,0.75, 0.9, 0.99),
                     labels = c('1%', '10%', '25%', '50%', '75%', '90%', '99%'))


# FDC for each year of record (in grey)

p1 <- flow_daily %>%
  group_by(flow_year) %>%
  mutate(my_rank = 1-percent_rank(flow_mean)) %>%
  mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
  ggplot(aes(x = my_rank, y = flow_mean, group = flow_year)) +
  geom_line(colour = 'grey70', size = 0.2)+
  scale_y_continuous(name = 'Mean daily flow (ML/d)',
                     trans = 'log10',
                     breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
                     minor_breaks = log10_minor_break(),
                     limits = c(1, 10000),
                     labels = c(0.01, 0.1, 1, 10, 100, 1000, '10,000')) +
  scale_x_continuous(name = 'Percentage of time flow exceeded',
                     trans = 'probit',
                     limits = c(0.01, 0.99),
                     breaks = c(0.01, 0.1,0.25,0.5,0.75, 0.9, 0.99),
                     labels = c('1%', '10%', '25%', '50%', '75%', '90%', '99%'))

p1

# with some years highlighted

year_highlights <- flow_daily %>%
  filter(flow_year %in% c(1976, 1993, 1974)) %>%
  group_by(flow_year) %>%
  mutate(my_rank = 1- percent_rank(flow_mean)) %>%
  mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank))


# FDC using all flows
fdc_all <- flow_daily %>%
  mutate(my_rank =  1-percent_rank(flow_mean) ) %>%
  mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
  mutate(my_rank = if_else(my_rank == 0, 0.0001, my_rank)) %>%
  select(my_rank, flow_mean)

fdc_all$flow_year <-  2000


# Overplot the selected years and the average for all years
p2 <- p1 +
  geom_line(data = year_highlights, aes(x = my_rank, y = flow_mean, colour = factor(flow_year)), size = 0.4) +
  scale_colour_discrete(name = NULL) +
  theme(legend.position = 'bottom',
        text = element_text(size = 8)) +
  geom_line(data = fdc_all, aes(my_rank, flow_mean ), colour = 'black', size = 0.4)


p2

#ggsave("../figures/fdc_example.png", p2, width=4, height=4)
	library(tidyverse)
	library(lubridate)
	library(assertthat)



	# Helper functions --------------------------------------------------------

	# Function to plot minor breaks at a log scale
	# https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot
	log10_minor_break = function (...){
	function(x) {
	minx = floor(min(log10(x), na.rm=T))-1;
	maxx = ceiling(max(log10(x), na.rm=T))+1;
	n_major = maxx-minx+1;
	major_breaks = seq(minx, maxx, by=1)
	minor_breaks =
	rep(log10(seq(1, 9, by=1)), times = n_major)+
	rep(major_breaks, each = 9)
	return(10^(minor_breaks))
	}
	}

	log10_major_break = function (...){
	function(x) {
	minx = floor(min(log10(x), na.rm=T))-1;
	maxx = ceiling(max(log10(x), na.rm=T))+1;
	n_major = maxx-minx+1;
	major_breaks = seq(minx, maxx, by=1)
	return(10^(major_breaks))
	}
	}



	#______________________________________________________________________________________
	# Read in the data



	#
	# Site 404216 BROKEN RIVER @ GOORAMBAT (CASEY WEIR H. GAUGE) Lat:-36.47353488 Long:145.9417393
	# Data on google drive
	# https://drive.google.com/open?id=1-g1rOfeOxquPow4YM1aifdEkdBboUzu4



	id <- "1-g1rOfeOxquPow4YM1aifdEkdBboUzu4" # google file ID
	flow_daily <- read_csv(file =sprintf("https://docs.google.com/uc?id=%s&export=download", id),
	col_types = cols(
	X1 = col_character(),
	X2 = col_double(),
	X3 = col_integer(),
	X4 = col_double(),
	X5 = col_integer(),
	X6 = col_double(),
	X7 = col_integer()
	),
	col_names = FALSE,
	skip = 17
	)

	names(flow_daily) <- c('date',
	'flow_mean',
	'qcode_mean',
	'flow_min',
	'qcode_min',
	'flow_max',
	'qcode_max')



	#______________________________________________________________________________________
	# Checks
	# Missing data

	colSums(is.na(flow_daily))

	# Remove first line (contains missing data and add columns we'll use later)

	flow_daily <- flow_daily %>%
	slice(2:nrow(flow_daily)) %>% # There is only one missing value, which is the first, so lets remove it.
	mutate(date = as.Date(str_sub(date, 1, 10), format = '%d/%m/%Y')) %>% # Convert time, just you days
	mutate(flow_year = year(date)) %>%
	mutate(flow_month = month(date)) %>%
	mutate(jday = yday(date))



	# Check time step
	# should be consistent between all values
	x <- flow_daily %>%
	mutate(date_diff = date - lag(date)) %>%
	count(date_diff) # ok
	res <- assert_that(x[1,2] == nrow(flow_daily)-1)


	# Look for incomplete years

	flow_daily %>%
	mutate(flow_year = year(date)) %>%
	group_by(flow_year) %>%
	summarise(days_measured = n()) %>%
	filter(!(days_measured %in% c(365,366))) # Years without 365 or 366 days

	# Delete 1972 and 2017 (first and last year)

	flow_daily <- flow_daily %>%
	filter(!(flow_year %in% c(1972, 2017)))

	# Check the occurance of Zero values
	colSums(flow_daily == 0)
	# date flow_mean qcode_mean flow_min qcode_min flow_max qcode_max flow_year flow_month
	# 0 560 0 783 0 557 0 0 0
	# jday
	# 0



	# Flow duration curves ----------------------------------------------------


	# FDC using all flows

	flow_daily %>%
	mutate(my_rank = 1- percent_rank(flow_mean)) %>%
	mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
	ggplot(aes(x = my_rank, y = flow_mean)) +
	geom_line() +
	scale_y_continuous(name = 'Mean daily flow (ML/d)',
	trans = 'log10',
	breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
	minor_breaks = log10_minor_break(),
	limits = c(0.01, 10000),
	labels = c(0.01, 0.1, 1, 10, 100, 1000, '10,000')) +
	scale_x_continuous(name = 'Percentage of time flow exceeded',
	trans = 'probit',
	limits = c(0.01, 0.99),
	breaks = c(0.01, 0.1,0.25,0.5,0.75, 0.9, 0.99),
	labels = c('1%', '10%', '25%', '50%', '75%', '90%', '99%'))



	# FDC for each year of record (in grey)

	p1 <- flow_daily %>%
	group_by(flow_year) %>%
	mutate(my_rank = 1-percent_rank(flow_mean)) %>%
	mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
	ggplot(aes(x = my_rank, y = flow_mean, group = flow_year)) +
	geom_line(colour = 'grey70', size = 0.2)+
	scale_y_continuous(name = 'Mean daily flow (ML/d)',
	trans = 'log10',
	breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
	minor_breaks = log10_minor_break(),
	limits = c(1, 10000),
	labels = c(0.01, 0.1, 1, 10, 100, 1000, '10,000')) +
	scale_x_continuous(name = 'Percentage of time flow exceeded',
	trans = 'probit',
	limits = c(0.01, 0.99),
	breaks = c(0.01, 0.1,0.25,0.5,0.75, 0.9, 0.99),
	labels = c('1%', '10%', '25%', '50%', '75%', '90%', '99%'))

	p1

	# with some years highlighted

	year_highlights <- flow_daily %>%
	filter(flow_year %in% c(1976, 1993, 1974)) %>%
	group_by(flow_year) %>%
	mutate(my_rank = 1- percent_rank(flow_mean)) %>%
	mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank))


	# FDC using all flows
	fdc_all <- flow_daily %>%
	mutate(my_rank = 1-percent_rank(flow_mean) ) %>%
	mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
	mutate(my_rank = if_else(my_rank == 0, 0.0001, my_rank)) %>%
	select(my_rank, flow_mean)

	fdc_all$flow_year <- 2000


	# Overplot the selected years and the average for all years
	p2 <- p1 +
	geom_line(data = year_highlights, aes(x = my_rank, y = flow_mean, colour = factor(flow_year)), size = 0.4) +
	scale_colour_discrete(name = NULL) +
	theme(legend.position = 'bottom',
	text = element_text(size = 8)) +
	geom_line(data = fdc_all, aes(my_rank, flow_mean ), colour = 'black', size = 0.4)


	p2

	#ggsave("../figures/fdc_example.png", p2, width=4, height=4)