Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Created August 6, 2018 22:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/35ac7e4fe5801170c51a0a0976900ebb to your computer and use it in GitHub Desktop.
Save TonyLadson/35ac7e4fe5801170c51a0a0976900ebb to your computer and use it in GitHub Desktop.
Cut-and-stack plot to show a long series of flow data. See the blog at https://wordpress.com/post/tonyladson.wordpress.com/10393
######################################################################################
#
# Cut and Stack Plot
#
#######################################################################################
library(tidyverse)
library(lubridate)
library(gridExtra)
library(scales)
library(padr)
# Read in sample flow data
# Available on Google Drive at
# https://drive.google.com/open?id=1-g1rOfeOxquPow4YM1aifdEkdBboUzu4
# For advice on reading in files stored on Google Drive
# https://stackoverflow.com/questions/33135060/read-csv-file-hosted-on-google-drive
id <- "1-g1rOfeOxquPow4YM1aifdEkdBboUzu4" # google file ID
flow_daily <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id),
col_types = cols(
X1 = col_character(),
X2 = col_double(),
X3 = col_integer(),
X4 = col_double(),
X5 = col_integer(),
X6 = col_double(),
X7 = col_integer()
),
col_names = FALSE,
skip = 17)
# On my computer
# project_path <- "/Users/anthonyladson/Dropbox/Grad Cert/2017/Unit 1/Hydrology/Casey's Weir/"
# flow_daily <- read_csv(file =str_c(project_path, '404216_20170628/404216.csv' ),
# col_types = cols(
# X1 = col_character(),
# X2 = col_double(),
# X3 = col_integer(),
# X4 = col_double(),
# X5 = col_integer(),
# X6 = col_double(),
# X7 = col_integer()
# ),
# col_names = FALSE,
# skip = 17
# )
names(flow_daily) <- c('date',
'flow_mean',
'qcode_mean',
'flow_min',
'qcode_min',
'flow_max',
'qcode_max')
# Convert date
flow_daily <- flow_daily %>%
mutate(date = as.Date(str_sub(date, 1, 10), format = '%d/%m/%Y')) # Convert time, just use days
#______________________________________________________________________________________
# Checks
# start and end of the file
head(flow_daily)
tail(flow_daily)
# Missing data
colSums(is.na(flow_daily))
# Remove first line (contains missing data and add columns we'll use later)
flow_daily <- flow_daily %>%
slice(2:nrow(flow_daily)) # There is only one missing value, which is the first, so lets remove it.
# Check time step
x <- flow_daily %>%
mutate(date_diff = date - lag(date)) %>%
count(date_diff) # ok
res <- assertthat::assert_that(x[1,2] == nrow(flow_daily)-1)
# # Look for incomplete years
# # This isn't relevant for cut and stack plots
#
# flow_daily %>%
# mutate(flow_year = year(date)) %>%
# group_by(flow_year) %>%
# summarise(days_measured = n()) %>%
# filter(!(days_measured %in% c(365,366))) # Years without 365 or 366 days
#
# # Delete 1972 and 2017 (first and last year)
#
# flow_daily <- flow_daily %>%
# filter(!(lubridate::year(date) %in% c(1972, 2017)))
# Zero values
colSums(dplyr::near(flow_daily[,-1], 0))
# looks fine
# date flow_mean qcode_mean flow_min qcode_min flow_max qcode_max flow_year flow_month
# 0 560 0 783 0 557 0 0 0
# jday
# 0
# plot all data
flow_daily %>%
ggplot(aes(date, flow_mean)) +
geom_line() +
scale_x_date(name = 'Date',
date_breaks = '2 years',
date_labels = '%Y',
limits = c(as.Date('1972-06-01'), as.Date('2017-7-31'))) + # These limits are a work around to so x-axis goes from 1972-2018
scale_y_continuous(name = 'Mean daily flow (ML/d)',
labels = comma) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Cut and stack
# ggplot version
# Uses ideas from 'Stacked Time Series in R'
# https://bscheng.com/2016/10/17/stacked-time-series-in-r/
# start and end dates
flow_daily %>%
dplyr::summarise(min(date), max(date))
# # A tibble: 1 x 2
# `min(flow_year)` `max(flow_year)`
# <dbl> <dbl>
# 1 1973 2016
# We will cut and stack by decades so
# Pad out to complete decades
flow_daily <- flow_daily %>% pad('day', start_val = as.Date('1970-01-01'))
flow_daily <- flow_daily %>% pad('day', end_val = as.Date('2019-12-31'))
# Add additional date information
flow_daily <- flow_daily %>%
mutate(flow_year = year(date)) %>%
mutate(flow_month = month(date)) %>%
mutate(jday = yday(date))
# Add decade variable
flow_daily <- flow_daily %>%
mutate(decade = case_when(
flow_year >= 1970 & flow_year <= 1979 ~ '1970-1979',
flow_year >= 1980 & flow_year <= 1989 ~ '1980-1989',
flow_year >= 1990 & flow_year <= 1999 ~ '1990-1999',
flow_year >= 2000 & flow_year <= 2009 ~ '2000-2009',
flow_year >= 2010 & flow_year <= 2019 ~ '2010-2019',
TRUE ~ NA_character_))
# We need to make the plots individually and stack them
# We need to fix the y-axis scale
# Use mean daily flow
#glimpse(flow_daily)
# check the max and min flow to set the scales
flow_daily %>%
dplyr::summarise(min(flow_mean, na.rm = TRUE), max(flow_mean, na.rm = TRUE))
# # A tibble: 1 x 2
# `min(flow_mean, na.rm = TRUE)` `max(flow_mean, na.rm = TRUE)`
# <dbl> <dbl>
# 1 0 71366.
upper_limit <- 72000
p1979_1979 <-
flow_daily %>%
filter(decade == '1970-1979') %>%
ggplot(aes(date, flow_mean)) + geom_line() +
scale_x_date(name = '',
date_breaks = "1 years",
date_labels = '%Y') +
scale_y_continuous(name = '',
labels = comma,
limits = c(0, upper_limit))
p1979_1979 # check graph
p1980_1989 <-
flow_daily %>%
filter(decade == '1980-1989') %>%
ggplot(aes(date, flow_mean)) + geom_line() +
scale_x_date(name = '',
date_breaks = "1 years",
date_labels = '%Y') +
scale_y_continuous(name = '',
labels = comma,
limits = c(0, upper_limit))
p1980_1989
p1990_1999 <-
flow_daily %>%
filter(decade == '1990-1999') %>%
ggplot(aes(date, flow_mean)) + geom_line() +
scale_x_date(name = '',
date_breaks = "1 years",
date_labels = '%Y') +
scale_y_continuous(name = 'Mean daily flow (ML/d)',
labels = comma,
limits = c(0, upper_limit))
p1990_1999
p2000_2009 <-
flow_daily %>%
filter(decade == '2000-2009') %>%
ggplot(aes(date, flow_mean)) + geom_line() +
scale_x_date(name = '',
date_breaks = "1 years",
date_labels = '%Y') +
scale_y_continuous(name = '',
labels = comma,
limits = c(0, upper_limit))
p2000_2009
p2010_2019 <-
flow_daily %>%
filter(decade == '2010-2019') %>%
ggplot(aes(date, flow_mean)) + geom_line() +
scale_x_date(name = '',
date_breaks = "1 years",
date_labels = '%Y') +
scale_y_continuous(name = '',
labels = comma,
limits = c(0, upper_limit))
p2010_2019
# Stick together
gridExtra::grid.arrange(p1979_1979, p1980_1989, p1990_1999, p2000_2009, p2010_2019, ncol = 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment