Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active December 4, 2018 03:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/086e20f10103baa0baa81f55302b1962 to your computer and use it in GitHub Desktop.
Save TonyLadson/086e20f10103baa0baa81f55302b1962 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(lubridate)
library(assertthat)
# Helper functions --------------------------------------------------------
# Function to plot minor breaks at a log scale
# https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot
log10_minor_break = function (...){
function(x) {
minx = floor(min(log10(x), na.rm=T))-1;
maxx = ceiling(max(log10(x), na.rm=T))+1;
n_major = maxx-minx+1;
major_breaks = seq(minx, maxx, by=1)
minor_breaks =
rep(log10(seq(1, 9, by=1)), times = n_major)+
rep(major_breaks, each = 9)
return(10^(minor_breaks))
}
}
log10_major_break = function (...){
function(x) {
minx = floor(min(log10(x), na.rm=T))-1;
maxx = ceiling(max(log10(x), na.rm=T))+1;
n_major = maxx-minx+1;
major_breaks = seq(minx, maxx, by=1)
return(10^(major_breaks))
}
}
#______________________________________________________________________________________
# Read in the data
#
# Site 404216 BROKEN RIVER @ GOORAMBAT (CASEY WEIR H. GAUGE) Lat:-36.47353488 Long:145.9417393
# Data on google drive
# https://drive.google.com/open?id=1-g1rOfeOxquPow4YM1aifdEkdBboUzu4
id <- "1-g1rOfeOxquPow4YM1aifdEkdBboUzu4" # google file ID
flow_daily <- read_csv(file =sprintf("https://docs.google.com/uc?id=%s&export=download", id),
col_types = cols(
X1 = col_character(),
X2 = col_double(),
X3 = col_integer(),
X4 = col_double(),
X5 = col_integer(),
X6 = col_double(),
X7 = col_integer()
),
col_names = FALSE,
skip = 17
)
names(flow_daily) <- c('date',
'flow_mean',
'qcode_mean',
'flow_min',
'qcode_min',
'flow_max',
'qcode_max')
#______________________________________________________________________________________
# Checks
# Missing data
colSums(is.na(flow_daily))
# Remove first line (contains missing data and add columns we'll use later)
flow_daily <- flow_daily %>%
slice(2:nrow(flow_daily)) %>% # There is only one missing value, which is the first, so lets remove it.
mutate(date = as.Date(str_sub(date, 1, 10), format = '%d/%m/%Y')) %>% # Convert time, just you days
mutate(flow_year = year(date)) %>%
mutate(flow_month = month(date)) %>%
mutate(jday = yday(date))
# Check time step
# should be consistent between all values
x <- flow_daily %>%
mutate(date_diff = date - lag(date)) %>%
count(date_diff) # ok
res <- assert_that(x[1,2] == nrow(flow_daily)-1)
# Look for incomplete years
flow_daily %>%
mutate(flow_year = year(date)) %>%
group_by(flow_year) %>%
summarise(days_measured = n()) %>%
filter(!(days_measured %in% c(365,366))) # Years without 365 or 366 days
# Delete 1972 and 2017 (first and last year)
flow_daily <- flow_daily %>%
filter(!(flow_year %in% c(1972, 2017)))
# Check the occurance of Zero values
colSums(flow_daily == 0)
# date flow_mean qcode_mean flow_min qcode_min flow_max qcode_max flow_year flow_month
# 0 560 0 783 0 557 0 0 0
# jday
# 0
# Flow duration curves ----------------------------------------------------
# FDC using all flows
flow_daily %>%
mutate(my_rank = 1- percent_rank(flow_mean)) %>%
mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
ggplot(aes(x = my_rank, y = flow_mean)) +
geom_line() +
scale_y_continuous(name = 'Mean daily flow (ML/d)',
trans = 'log10',
breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
minor_breaks = log10_minor_break(),
limits = c(0.01, 10000),
labels = c(0.01, 0.1, 1, 10, 100, 1000, '10,000')) +
scale_x_continuous(name = 'Percentage of time flow exceeded',
trans = 'probit',
limits = c(0.01, 0.99),
breaks = c(0.01, 0.1,0.25,0.5,0.75, 0.9, 0.99),
labels = c('1%', '10%', '25%', '50%', '75%', '90%', '99%'))
# FDC for each year of record (in grey)
p1 <- flow_daily %>%
group_by(flow_year) %>%
mutate(my_rank = 1-percent_rank(flow_mean)) %>%
mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
ggplot(aes(x = my_rank, y = flow_mean, group = flow_year)) +
geom_line(colour = 'grey70', size = 0.2)+
scale_y_continuous(name = 'Mean daily flow (ML/d)',
trans = 'log10',
breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000),
minor_breaks = log10_minor_break(),
limits = c(1, 10000),
labels = c(0.01, 0.1, 1, 10, 100, 1000, '10,000')) +
scale_x_continuous(name = 'Percentage of time flow exceeded',
trans = 'probit',
limits = c(0.01, 0.99),
breaks = c(0.01, 0.1,0.25,0.5,0.75, 0.9, 0.99),
labels = c('1%', '10%', '25%', '50%', '75%', '90%', '99%'))
p1
# with some years highlighted
year_highlights <- flow_daily %>%
filter(flow_year %in% c(1976, 1993, 1974)) %>%
group_by(flow_year) %>%
mutate(my_rank = 1- percent_rank(flow_mean)) %>%
mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank))
# FDC using all flows
fdc_all <- flow_daily %>%
mutate(my_rank = 1-percent_rank(flow_mean) ) %>%
mutate(my_rank = if_else(my_rank == 1, 0.9999, my_rank)) %>%
mutate(my_rank = if_else(my_rank == 0, 0.0001, my_rank)) %>%
select(my_rank, flow_mean)
fdc_all$flow_year <- 2000
# Overplot the selected years and the average for all years
p2 <- p1 +
geom_line(data = year_highlights, aes(x = my_rank, y = flow_mean, colour = factor(flow_year)), size = 0.4) +
scale_colour_discrete(name = NULL) +
theme(legend.position = 'bottom',
text = element_text(size = 8)) +
geom_line(data = fdc_all, aes(my_rank, flow_mean ), colour = 'black', size = 0.4)
p2
#ggsave("../figures/fdc_example.png", p2, width=4, height=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment