Created
August 6, 2018 22:37
-
-
Save TonyLadson/35ac7e4fe5801170c51a0a0976900ebb to your computer and use it in GitHub Desktop.
Cut-and-stack plot to show a long series of flow data. See the blog at https://wordpress.com/post/tonyladson.wordpress.com/10393
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################################################################################### | |
# | |
# Cut and Stack Plot | |
# | |
####################################################################################### | |
library(tidyverse) | |
library(lubridate) | |
library(gridExtra) | |
library(scales) | |
library(padr) | |
# Read in sample flow data | |
# Available on Google Drive at | |
# https://drive.google.com/open?id=1-g1rOfeOxquPow4YM1aifdEkdBboUzu4 | |
# For advice on reading in files stored on Google Drive | |
# https://stackoverflow.com/questions/33135060/read-csv-file-hosted-on-google-drive | |
id <- "1-g1rOfeOxquPow4YM1aifdEkdBboUzu4" # google file ID | |
flow_daily <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id), | |
col_types = cols( | |
X1 = col_character(), | |
X2 = col_double(), | |
X3 = col_integer(), | |
X4 = col_double(), | |
X5 = col_integer(), | |
X6 = col_double(), | |
X7 = col_integer() | |
), | |
col_names = FALSE, | |
skip = 17) | |
# On my computer | |
# project_path <- "/Users/anthonyladson/Dropbox/Grad Cert/2017/Unit 1/Hydrology/Casey's Weir/" | |
# flow_daily <- read_csv(file =str_c(project_path, '404216_20170628/404216.csv' ), | |
# col_types = cols( | |
# X1 = col_character(), | |
# X2 = col_double(), | |
# X3 = col_integer(), | |
# X4 = col_double(), | |
# X5 = col_integer(), | |
# X6 = col_double(), | |
# X7 = col_integer() | |
# ), | |
# col_names = FALSE, | |
# skip = 17 | |
# ) | |
names(flow_daily) <- c('date', | |
'flow_mean', | |
'qcode_mean', | |
'flow_min', | |
'qcode_min', | |
'flow_max', | |
'qcode_max') | |
# Convert date | |
flow_daily <- flow_daily %>% | |
mutate(date = as.Date(str_sub(date, 1, 10), format = '%d/%m/%Y')) # Convert time, just use days | |
#______________________________________________________________________________________ | |
# Checks | |
# start and end of the file | |
head(flow_daily) | |
tail(flow_daily) | |
# Missing data | |
colSums(is.na(flow_daily)) | |
# Remove first line (contains missing data and add columns we'll use later) | |
flow_daily <- flow_daily %>% | |
slice(2:nrow(flow_daily)) # There is only one missing value, which is the first, so lets remove it. | |
# Check time step | |
x <- flow_daily %>% | |
mutate(date_diff = date - lag(date)) %>% | |
count(date_diff) # ok | |
res <- assertthat::assert_that(x[1,2] == nrow(flow_daily)-1) | |
# # Look for incomplete years | |
# # This isn't relevant for cut and stack plots | |
# | |
# flow_daily %>% | |
# mutate(flow_year = year(date)) %>% | |
# group_by(flow_year) %>% | |
# summarise(days_measured = n()) %>% | |
# filter(!(days_measured %in% c(365,366))) # Years without 365 or 366 days | |
# | |
# # Delete 1972 and 2017 (first and last year) | |
# | |
# flow_daily <- flow_daily %>% | |
# filter(!(lubridate::year(date) %in% c(1972, 2017))) | |
# Zero values | |
colSums(dplyr::near(flow_daily[,-1], 0)) | |
# looks fine | |
# date flow_mean qcode_mean flow_min qcode_min flow_max qcode_max flow_year flow_month | |
# 0 560 0 783 0 557 0 0 0 | |
# jday | |
# 0 | |
# plot all data | |
flow_daily %>% | |
ggplot(aes(date, flow_mean)) + | |
geom_line() + | |
scale_x_date(name = 'Date', | |
date_breaks = '2 years', | |
date_labels = '%Y', | |
limits = c(as.Date('1972-06-01'), as.Date('2017-7-31'))) + # These limits are a work around to so x-axis goes from 1972-2018 | |
scale_y_continuous(name = 'Mean daily flow (ML/d)', | |
labels = comma) + | |
theme(axis.text.x = element_text(angle = 45, hjust = 1)) | |
## Cut and stack | |
# ggplot version | |
# Uses ideas from 'Stacked Time Series in R' | |
# https://bscheng.com/2016/10/17/stacked-time-series-in-r/ | |
# start and end dates | |
flow_daily %>% | |
dplyr::summarise(min(date), max(date)) | |
# # A tibble: 1 x 2 | |
# `min(flow_year)` `max(flow_year)` | |
# <dbl> <dbl> | |
# 1 1973 2016 | |
# We will cut and stack by decades so | |
# Pad out to complete decades | |
flow_daily <- flow_daily %>% pad('day', start_val = as.Date('1970-01-01')) | |
flow_daily <- flow_daily %>% pad('day', end_val = as.Date('2019-12-31')) | |
# Add additional date information | |
flow_daily <- flow_daily %>% | |
mutate(flow_year = year(date)) %>% | |
mutate(flow_month = month(date)) %>% | |
mutate(jday = yday(date)) | |
# Add decade variable | |
flow_daily <- flow_daily %>% | |
mutate(decade = case_when( | |
flow_year >= 1970 & flow_year <= 1979 ~ '1970-1979', | |
flow_year >= 1980 & flow_year <= 1989 ~ '1980-1989', | |
flow_year >= 1990 & flow_year <= 1999 ~ '1990-1999', | |
flow_year >= 2000 & flow_year <= 2009 ~ '2000-2009', | |
flow_year >= 2010 & flow_year <= 2019 ~ '2010-2019', | |
TRUE ~ NA_character_)) | |
# We need to make the plots individually and stack them | |
# We need to fix the y-axis scale | |
# Use mean daily flow | |
#glimpse(flow_daily) | |
# check the max and min flow to set the scales | |
flow_daily %>% | |
dplyr::summarise(min(flow_mean, na.rm = TRUE), max(flow_mean, na.rm = TRUE)) | |
# # A tibble: 1 x 2 | |
# `min(flow_mean, na.rm = TRUE)` `max(flow_mean, na.rm = TRUE)` | |
# <dbl> <dbl> | |
# 1 0 71366. | |
upper_limit <- 72000 | |
p1979_1979 <- | |
flow_daily %>% | |
filter(decade == '1970-1979') %>% | |
ggplot(aes(date, flow_mean)) + geom_line() + | |
scale_x_date(name = '', | |
date_breaks = "1 years", | |
date_labels = '%Y') + | |
scale_y_continuous(name = '', | |
labels = comma, | |
limits = c(0, upper_limit)) | |
p1979_1979 # check graph | |
p1980_1989 <- | |
flow_daily %>% | |
filter(decade == '1980-1989') %>% | |
ggplot(aes(date, flow_mean)) + geom_line() + | |
scale_x_date(name = '', | |
date_breaks = "1 years", | |
date_labels = '%Y') + | |
scale_y_continuous(name = '', | |
labels = comma, | |
limits = c(0, upper_limit)) | |
p1980_1989 | |
p1990_1999 <- | |
flow_daily %>% | |
filter(decade == '1990-1999') %>% | |
ggplot(aes(date, flow_mean)) + geom_line() + | |
scale_x_date(name = '', | |
date_breaks = "1 years", | |
date_labels = '%Y') + | |
scale_y_continuous(name = 'Mean daily flow (ML/d)', | |
labels = comma, | |
limits = c(0, upper_limit)) | |
p1990_1999 | |
p2000_2009 <- | |
flow_daily %>% | |
filter(decade == '2000-2009') %>% | |
ggplot(aes(date, flow_mean)) + geom_line() + | |
scale_x_date(name = '', | |
date_breaks = "1 years", | |
date_labels = '%Y') + | |
scale_y_continuous(name = '', | |
labels = comma, | |
limits = c(0, upper_limit)) | |
p2000_2009 | |
p2010_2019 <- | |
flow_daily %>% | |
filter(decade == '2010-2019') %>% | |
ggplot(aes(date, flow_mean)) + geom_line() + | |
scale_x_date(name = '', | |
date_breaks = "1 years", | |
date_labels = '%Y') + | |
scale_y_continuous(name = '', | |
labels = comma, | |
limits = c(0, upper_limit)) | |
p2010_2019 | |
# Stick together | |
gridExtra::grid.arrange(p1979_1979, p1980_1989, p1990_1999, p2000_2009, p2010_2019, ncol = 1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment