TonyLadson/facet.R

## facet.R
######################################################################################
#
# Facet plot for flows
#
#######################################################################################

library(tidyverse)
library(lubridate)
library(gridExtra)
library(scales)
library(padr)


# Read in sample flow data
# Available on Google Drive at
# https://drive.google.com/open?id=1-g1rOfeOxquPow4YM1aifdEkdBboUzu4

# For advice on reading in files stored on Google Drive
# https://stackoverflow.com/questions/33135060/read-csv-file-hosted-on-google-drive

id <- "1-g1rOfeOxquPow4YM1aifdEkdBboUzu4" # google file ID

flow_daily <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id),
                       col_types = cols(
                         X1 = col_character(),
                         X2 = col_double(),
                         X3 = col_integer(),
                         X4 = col_double(),
                         X5 = col_integer(),
                         X6 = col_double(),
                         X7 = col_integer()
                       ),
                       col_names = FALSE,
                       skip = 17)


# On my computer
# project_path <- "/Users/anthonyladson/Dropbox/Grad Cert/2017/Unit 1/Hydrology/Casey's Weir/"
# flow_daily <- read_csv(file =str_c(project_path, '404216_20170628/404216.csv' ),
#                        col_types = cols(
#                          X1 = col_character(),
#                          X2 = col_double(),
#                          X3 = col_integer(),
#                          X4 = col_double(),
#                          X5 = col_integer(),
#                          X6 = col_double(),
#                          X7 = col_integer()
#                        ),
#                        col_names = FALSE,
#                        skip = 17
# )

names(flow_daily) <- c('date',
                       'flow_mean',
                       'qcode_mean',
                       'flow_min',
                       'qcode_min',
                       'flow_max',
                       'qcode_max')

# Convert date

flow_daily <- flow_daily %>%
  mutate(date = as.Date(str_sub(date, 1, 10), format = '%d/%m/%Y'))  # Convert time, just use days


#______________________________________________________________________________________
# Checks


# start and end of the file

head(flow_daily)
tail(flow_daily)


# Missing data

colSums(is.na(flow_daily))

# Remove first line (contains missing data and add columns we'll use later)

flow_daily <- flow_daily %>%
  slice(2:nrow(flow_daily)) # There is only one missing value, which is the first, so lets remove it.


# Check time step

x <- flow_daily %>%
  mutate(date_diff = date - lag(date)) %>%
  count(date_diff) # ok
res <- assertthat::assert_that(x[1,2] == nrow(flow_daily)-1)


# Zero values

colSums(dplyr::near(flow_daily[,-1], 0))
# looks fine
# date  flow_mean qcode_mean   flow_min  qcode_min   flow_max  qcode_max  flow_year flow_month
# 0        560          0        783          0        557          0          0          0
# jday
# 0


# plot
flow_daily %>%
  ggplot(aes(date, flow_mean)) +
  geom_line() +
  scale_x_date(name = 'Date',
               date_breaks = '2 years',
               date_labels = '%Y',
               limits = c(as.Date('1972-06-01'), as.Date('2017-7-31'))) + # These limits are a work around to so x-axis goes from 1972-2018
  scale_y_continuous(name = 'Mean daily flow (ML/d)',
                     labels = comma) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Facet plot

# Add additional date information
flow_daily <- flow_daily %>%
  mutate(flow_year = year(date)) %>%
  mutate(flow_month = month(date)) %>%
  mutate(jday = yday(date))


month_starts <- yday(as.Date('2018-01-01') %m+% months(c(0, 3, 6, 9)))
month_starts
month_labs <- month.abb[c(1,4,7,10)]
month_labs

# with constant y-axis scaling

flow_daily %>%
  ggplot(aes(jday, flow_mean)) +
  geom_line() +
  scale_y_continuous(name = 'Mean daily flow (ML/d)',
                     labels = comma) +
  scale_x_continuous(name = 'Date',
                     breaks = month_starts,
                     labels = month_labs) +
  facet_wrap(~flow_year, ncol = 5, scales = 'fixed')


# with variable y-axis scaling

flow_daily %>%
  ggplot(aes(jday, flow_mean)) +
  geom_line() +
  scale_y_continuous(name = 'Mean daily flow (ML/d)',
                     labels = comma) +
  scale_x_continuous(name = 'Date',
                     breaks = month_starts,
                     labels = month_labs) +
  facet_wrap(~flow_year, ncol = 5, scales = 'free_y')
	######################################################################################
	#
	# Facet plot for flows
	#
	#######################################################################################

	library(tidyverse)
	library(lubridate)
	library(gridExtra)
	library(scales)
	library(padr)


	# Read in sample flow data
	# Available on Google Drive at
	# https://drive.google.com/open?id=1-g1rOfeOxquPow4YM1aifdEkdBboUzu4

	# For advice on reading in files stored on Google Drive
	# https://stackoverflow.com/questions/33135060/read-csv-file-hosted-on-google-drive

	id <- "1-g1rOfeOxquPow4YM1aifdEkdBboUzu4" # google file ID

	flow_daily <- read_csv(file = sprintf("https://docs.google.com/uc?id=%s&export=download", id),
	col_types = cols(
	X1 = col_character(),
	X2 = col_double(),
	X3 = col_integer(),
	X4 = col_double(),
	X5 = col_integer(),
	X6 = col_double(),
	X7 = col_integer()
	),
	col_names = FALSE,
	skip = 17)


	# On my computer
	# project_path <- "/Users/anthonyladson/Dropbox/Grad Cert/2017/Unit 1/Hydrology/Casey's Weir/"
	# flow_daily <- read_csv(file =str_c(project_path, '404216_20170628/404216.csv' ),
	# col_types = cols(
	# X1 = col_character(),
	# X2 = col_double(),
	# X3 = col_integer(),
	# X4 = col_double(),
	# X5 = col_integer(),
	# X6 = col_double(),
	# X7 = col_integer()
	# ),
	# col_names = FALSE,
	# skip = 17
	# )

	names(flow_daily) <- c('date',
	'flow_mean',
	'qcode_mean',
	'flow_min',
	'qcode_min',
	'flow_max',
	'qcode_max')

	# Convert date

	flow_daily <- flow_daily %>%
	mutate(date = as.Date(str_sub(date, 1, 10), format = '%d/%m/%Y')) # Convert time, just use days


	#______________________________________________________________________________________
	# Checks


	# start and end of the file

	head(flow_daily)
	tail(flow_daily)


	# Missing data

	colSums(is.na(flow_daily))

	# Remove first line (contains missing data and add columns we'll use later)

	flow_daily <- flow_daily %>%
	slice(2:nrow(flow_daily)) # There is only one missing value, which is the first, so lets remove it.


	# Check time step

	x <- flow_daily %>%
	mutate(date_diff = date - lag(date)) %>%
	count(date_diff) # ok
	res <- assertthat::assert_that(x[1,2] == nrow(flow_daily)-1)



	# Zero values

	colSums(dplyr::near(flow_daily[,-1], 0))
	# looks fine
	# date flow_mean qcode_mean flow_min qcode_min flow_max qcode_max flow_year flow_month
	# 0 560 0 783 0 557 0 0 0
	# jday
	# 0


	# plot
	flow_daily %>%
	ggplot(aes(date, flow_mean)) +
	geom_line() +
	scale_x_date(name = 'Date',
	date_breaks = '2 years',
	date_labels = '%Y',
	limits = c(as.Date('1972-06-01'), as.Date('2017-7-31'))) + # These limits are a work around to so x-axis goes from 1972-2018
	scale_y_continuous(name = 'Mean daily flow (ML/d)',
	labels = comma) +
	theme(axis.text.x = element_text(angle = 45, hjust = 1))


	# Facet plot

	# Add additional date information
	flow_daily <- flow_daily %>%
	mutate(flow_year = year(date)) %>%
	mutate(flow_month = month(date)) %>%
	mutate(jday = yday(date))



	month_starts <- yday(as.Date('2018-01-01') %m+% months(c(0, 3, 6, 9)))
	month_starts
	month_labs <- month.abb[c(1,4,7,10)]
	month_labs

	# with constant y-axis scaling

	flow_daily %>%
	ggplot(aes(jday, flow_mean)) +
	geom_line() +
	scale_y_continuous(name = 'Mean daily flow (ML/d)',
	labels = comma) +
	scale_x_continuous(name = 'Date',
	breaks = month_starts,
	labels = month_labs) +
	facet_wrap(~flow_year, ncol = 5, scales = 'fixed')


	# with variable y-axis scaling

	flow_daily %>%
	ggplot(aes(jday, flow_mean)) +
	geom_line() +
	scale_y_continuous(name = 'Mean daily flow (ML/d)',
	labels = comma) +
	scale_x_continuous(name = 'Date',
	breaks = month_starts,
	labels = month_labs) +
	facet_wrap(~flow_year, ncol = 5, scales = 'free_y')