Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active September 8, 2016 01:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/843410a1f9e1f1bbf1f48eeeee79a007 to your computer and use it in GitHub Desktop.
Save TonyLadson/843410a1f9e1f1bbf1f48eeeee79a007 to your computer and use it in GitHub Desktop.
# Hourly data from Victorian monitoring site
# This script takes the standard download of hourly max, min, mean data from
# http://data.water.vic.gov.au/monitoring.htm
# File is downloaded as 'gaugenumber.csv'
#remove(list = objects())
library(readr)
library(stringr)
library(dplyr)
library(lubridate)
library(ggplot2)
library(scales)
library(ggrepel)
library(RcppRoll)
#________________________________________________________
#
# Helper functions
#
#
# Shadow shift
# Courtesy Nicholas Tierney
# https://github.com/njtierney/ggmissing/blob/master/R/shadow_shift.R
shadow_shift <- function(x){
xrange <- max(x, na.rm = TRUE) - min(x, na.rm = TRUE)
xmin <- min(x, na.rm = TRUE)
# create the "jitter" to be added around the points.
xrunif <- (runif(length(x))-0.5)*xrange*0.05
ifelse(is.na(x),
# add the jitter around the those values that are missing
yes = xmin-xrange*0.1 + xrunif,
no = x)
} # close function
#
Value_atMax <- function(vec_1, vec_2) {
# get the value of vec_1 at the position of the maximum of vec_2
# this handles the case where vec_2 is all missing
if(all(is.na(vec_2))) return(NA)
vec_1[which.max(vec_2)]
}
# Helper function to count missing
N_missing <- function(x){ # sum number of missing values
sum(is.na(x))
}
#__________________________________________________________________________________________
# Download the hour data from
# http://data.water.vic.gov.au/monitoring.htm
# or a specific site
# http://data.water.vic.gov.au/monitoring.htm?ppbm=222200&rs&1&rsdf_org
# path to the file
# my.path <- 'path to file'
# my.file <- 'name of file
fname <- str_c(my.path, my.file, sep = '/')
# Read in the data
# 1. Find where the data starts by searching for the line that begins with 'Datetime'
heading.line <- which(str_detect(read_lines(fname, n_max = 50), 'Datetime')) # searching the first 50 lines
if(!any(heading.line)) stop('Heading not found') # trap error if heading not found
flow <- read_csv(fname, skip = heading.line - 1, n_max = Inf)
# check if there are any problems reading in the data
# Often, the final row is read in incorrectly
my.problems <- problems(flow)
n.problems <- nrow(my.problems)
if(n.problems > 2) stop('Problems reading in flow file (not just the final row)')
if(n.problems == 1) { # try to fix the problem, its likely the final row has been misread and can be removed
if(my.problems$row == nrow(flow)) { # is there a problem with the final row
flow <- flow[-nrow(flow), ] # remove final row
} else {
stop('Problems reading in flow file (single problem, not the final row)') # otherwise stop
}
}
# name columns
names(flow) <- c('date.time', 'flow.mean', 'flow.mean.qc', 'flow.min', 'flow.min.qc', 'flow.max', 'flow.max.qc')
# parse dates
flow <- flow %>%
mutate(date.time = parse_date_time(date.time, orders = 'dmy H:M:S'))
flow.annual <- flow %>%
mutate(flow.max = flow.max) %>%
mutate(flow.year = year(date.time)) %>%
mutate(flow.month = month(date.time))%>%
group_by(flow.year) %>%
summarise(max.date = Value_atMax(date.time, flow.max),
max.flow = max(flow.max, na.rm = FALSE),
max.month = Value_atMax(flow.month, flow.max),
n.days = n()/(24),
n.missing = N_missing(flow.max),
pc.missing = 100*n.missing/n())
View(flow.annual)
# flow data frame is now ready for further analysis
# Flow example
# 1. Plot annual peaks
flow.annual %>%
mutate(max.flow.shifted = shadow_shift(max.flow)) %>%
ggplot(aes(x = flow.year, y = max.flow.shifted, colour = is.na(max.flow))) + geom_point() +
xlab('Year') +
# scale_y_continuous(bquote('Peak discharge (m' ^3 *'s' ^-1 *')'), labels = comma) +
scale_y_continuous(name = 'Peak flow', labels = comma) +
scale_colour_discrete(name = 'Missing') +
theme_bw() +
theme(
panel.background = element_rect(fill="gray98"),
axis.title.x = element_text(colour="grey20", size=14, margin=margin(20,0,0,0)),
axis.text.x = element_text(colour="grey20",size=12),
axis.title.y = element_text(colour="grey20",size=14, margin = margin(0,20,0,0)),
axis.text.y = element_text(colour="grey20",size=14),
legend.title = element_text(colour="grey20",size=12),
plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm")) # top, right, bottom, left
# Time between peaks
# if this is too short the peaks are not independent
flow.annual %>%
mutate(time_diff =difftime(max.date, lag(max.date))) %>%
arrange(time_diff) %>%
View
# 2. Time of year polar plot
Date_to_Angle <- function(my.date){
start.year <- floor_date(my.date, 'year')
end.year <- ceiling_date(my.date, 'year')
length.year <- as.numeric(difftime(end.year, start.year, units = 'sec'))
time.since.year.start <- as.numeric(difftime(my.date, start.year, units = 'sec'))
2 * pi * time.since.year.start/length.year # date as angle
}
# Angles for month start
month.start <- yday(seq(as.Date('2010-01-01'),as.Date('2010-12-31'), by='1 months'))
month.start.radians <- 2*pi*month.start/365.25
# Labels for the largest n points
labeled.dat <- flow.annual %>%
mutate(angle = Date_to_Angle(max.date)) %>%
arrange(desc(max.flow)) %>%
slice(1:5)
flow.annual %>%
mutate(angle = Date_to_Angle(max.date)) %>%
ggplot(aes(x=angle, y=max.flow)) +
coord_polar('x', start = 0, direction = 1) +
geom_segment(aes(y = 0, xend = angle, yend = max.flow)) +
scale_x_continuous(name = 'Floods - time of year', breaks = month.start.radians, labels = month.name, limits = c(0, 2*pi)) +
scale_y_continuous(name = 'Peak flow', labels = comma) +
#scale_y_continuous(name = bquote('Peak discharge (m' ^3 *'s' ^-1 *')')) +
geom_text_repel(data = labeled.dat, aes(x = angle, y = max.flow + 100000, label = as.character(flow.year)), segment.size = 0) +
theme_bw() +
theme(
#panel.background = element_rect(fill="gray99"),
axis.title.x = element_text(colour="grey20", size=20, margin=margin(20,0,0,0)),
axis.text.x = element_text(colour="grey20",size=10),
axis.title.y = element_text(colour="grey20",size=20, margin = margin(0,20,0,0)),
axis.text.y = element_text(colour="grey20",size=14),
legend.title = element_text(colour="grey20",size=12),
plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm")) # top, right, bottom, left
# 3. Annual maximum three day volumes
# Use a rolling 72 hour mean
# Get max for each year
flow %>%
mutate(vol_3day = roll_mean(flow.mean, n = 72, fill = NA, na.rm = TRUE )) %>%
mutate(flow.year = year(date.time)) %>%
group_by(flow.year) %>%
summarise(vol.max = 3 * max(vol_3day)) %>%
View
# when does the maximum volume occur
flow %>%
mutate(vol_3day = roll_mean(flow.mean, n = 72, fill = NA, na.rm = TRUE )) %>%
mutate(flow.year = year(date.time)) %>%
group_by(flow.year) %>%
summarise(vol.max = 3 * max(vol_3day, na.rm = TRUE), vol.max.date = Value_atMax(date.time, vol_3day)) %>%
View
# 4. Empirical frequency curve
# Cunnane plotting position is recommended in Australian Rainfall and Runoff
pp_cunnane <- function(x){
n = length(x)
rank(x)/(n + 0.2)
}
my_AEP <- c(0.5, 1,2,5,10, 20, 50)
my_labels <- str_c(my_AEP, '%')
my_breaks <- 1 - AEP/100
flow.annual %>%
filter(!is.na(max.flow)) %>%
mutate(AEP = pp_cunnane(max.flow)) %>%
ggplot(aes(x = AEP, y = max.flow/86.4)) + geom_point() +
scale_y_log10(name = 'Peak flow',
breaks = c(1e+1, 1e+2, 1e+3, 1e+4),
limits = c(10, 1e+4),
labels = comma) +
scale_x_continuous(trans = "probit", breaks = my_breaks , labels = my_labels )+
theme_gray(base_size = 16)
# 5. Plots for indiviual years or time periods
# start_time <- ymd_hms('1971-02-06 17:00:00')
flow %>%
filter(date.time > ymd('1971-01-01', tz = 'UTC') & date.time < ymd('1971-02-20', tz = 'UTC') ) %>%
mutate(flow.max.shifted = shadow_shift(flow.max)) %>%
ggplot(aes(x = date.time, y = flow.max.shifted/86.4, colour = is.na(flow.max))) + geom_point() +
xlab('Year') +
# scale_y_continuous(bquote('Peak discharge (m' ^3 *'s' ^-1 *')'), labels = comma) +
scale_y_continuous(name = 'Peak flow', labels = comma) +
scale_colour_discrete(name = 'Missing') +
theme_bw() +
theme(
panel.background = element_rect(fill="gray98"),
axis.title.x = element_text(colour="grey20", size=14, margin=margin(20,0,0,0)),
axis.text.x = element_text(colour="grey20",size=12),
axis.title.y = element_text(colour="grey20",size=14, margin = margin(0,20,0,0)),
axis.text.y = element_text(colour="grey20",size=14),
legend.title = element_text(colour="grey20",size=12),
plot.margin = unit(c(0.5, 0.5, 1, 0.5), "cm")) # top, right, bottom, left
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment