Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Last active June 11, 2019 00:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/28132f84030c66855218e278a1cb1c60 to your computer and use it in GitHub Desktop.
Save TonyLadson/28132f84030c66855218e278a1cb1c60 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(stringr)
library(dplyr)
library(readxl)
library(readr)
library(purrr)
# Functions ---------------------------------------------------------------
# not in
"%ni%" <- function(x, y) x[!x %in% y]
# function to plot logarithmic minor breaks
# from https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot
log_breaks = function(maj, radix=10) {
function(x) {
minx = floor(min(logb(x,radix), na.rm=T)) - 1
maxx = ceiling(max(logb(x,radix), na.rm=T)) + 1
n_major = maxx - minx + 1
major_breaks = seq(minx, maxx, by=1)
if (maj) {
breaks = major_breaks
} else {
steps = logb(1:(radix-1),radix)
breaks = rep(steps, times=n_major) +
rep(major_breaks, each=radix-1)
}
radix^breaks
}
}
scale_x_log_eng = function(..., radix=10) {
scale_x_continuous(...,
trans=scales::log_trans(radix),
breaks=log_breaks(TRUE, radix),
minor_breaks=log_breaks(FALSE, radix))
}
scale_y_log_eng = function(..., radix=10) {
scale_y_continuous(...,
trans=scales::log_trans(radix),
breaks=log_breaks(TRUE, radix),
minor_breaks=log_breaks(FALSE, radix))
}
# Load Victorian data ------------------------------------------------------
# Data are available at
# https://drive.google.com/file/d/1UfRW0ua_dU658gUyBIdAtpoOOMtQdH6x/view?usp=sharing
fname <- file.path('../data', "vic_stations.csv")
vic_gauges <- read_csv(fname,
col_types = cols(
.default = col_double(),
station_name = col_character(),
state = col_character(),
region.name = col_character()
))
glimpse(vic_gauges)
# Load RFFE outputs -------------------------------------------------------
# Data are available at
#https://drive.google.com/file/d/12gc_4SAEPX8h3AnEaXbRlcj2oKdKpJRA/view?usp=sharing
fname = file.path('../data', "RFFE_output_at_stations.csv" )
RFFE_outputs <- read_csv(file = file.path('../data', "RFFE_output_at_stations.csv" ))
glimpse(RFFE_outputs)
# Compare -----------------------------------------------------------------
# Add means, differences and ratios to data frame
vic_comp <- vic_gauges %>%
select(station_id, station_name, area, flow_1pc) %>%
left_join(RFFE_outputs, by = 'station_id') %>%
select(station_id,
station_name.x,
area,
flow_1pc.x,
flow_1pc.y) %>%
rename(RFFE_1pc_in = flow_1pc.x,
RFFE_1pc_out = flow_1pc.y) %>%
mutate(diff_log = log10(RFFE_1pc_in) - log10(RFFE_1pc_out)) %>%
mutate(mean_log = log10(0.5*(RFFE_1pc_in + RFFE_1pc_out ))) %>%
mutate(ratio_1pc = RFFE_1pc_in/RFFE_1pc_out) %>%
mutate(ratio_1pc_log = log10(RFFE_1pc_in/RFFE_1pc_out)) %>%
mutate(diff = RFFE_1pc_in - RFFE_1pc_out) %>%
mutate(mean_1pc = 0.5*(RFFE_1pc_in + RFFE_1pc_out ))
# Scatter plot
vic_comp %>%
ggplot(aes(x = RFFE_1pc_out ,y = RFFE_1pc_in )) +
geom_point() +
scale_x_log_eng(name = bquote('1% AEP from RFFE input data (m' ^3 *'s' ^-1 *')'),
labels = scales::comma,
limits = c(1,20000))+
scale_y_log_eng(name = bquote('1% AEP estimate from RFFE ouput (m' ^3 *'s' ^-1 *')'),
labels = scales::comma,
limits = c(1,20000)) +
geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
geom_smooth(se = FALSE)
# Web version
my_size = 0.1
my_text_size = 2
p <- vic_comp %>%
ggplot(aes(x = RFFE_1pc_out ,y = RFFE_1pc_in )) +
geom_point(size = my_size) +
scale_x_log_eng(name = bquote('1% AEP from RFFE input data (m' ^3 *'s' ^-1 *')'),
labels = scales::comma,
limits = c(1,20000))+
scale_y_log_eng(name = bquote('1% AEP estimate from RFFE ouput (m' ^3 *'s' ^-1 *')'),
labels = scales::comma,
limits = c(1,20000)) +
geom_abline(slope = 1, intercept = 0, linetype = 'dashed', size = my_size) +
geom_smooth(se = FALSE, size = my_size) +
theme_gray(base_size = 5)
p
ggsave(file.path('../figures', 'scatterplot_vic.png'), p, width = 4, height = 3)
ratio_stats_log <-
vic_comp %>%
#filter(! station_id %in% c(210084, 212040)) %>%
summarise(mean_difference_log = mean(ratio_1pc_log),
sd_diff_log = sd(ratio_1pc_log)) %>%
unlist()
ratio_stats_log
# mean_difference_log sd_diff_log
# 0.04094854 0.34756643
10^ratio_stats_log["mean_difference_log"]
# mean_difference_log
# 1.098876
ULA <- as.vector(ratio_stats_log["mean_difference_log"] + 2* ratio_stats_log["sd_diff_log"]) # Upper Limit
ULA
# [1] 0.7360814
10^ULA
#[1] 5.446047
LLA <- as.vector(ratio_stats_log["mean_difference_log"] - 2* ratio_stats_log["sd_diff_log"]) # Upper Limit
LLA
#[1] -0.6541843
10^LLA
# [1] 0.2217255
# make a data frame for labels
vic_comp_label<-
vic_comp %>%
filter(ratio_1pc > 7 | ratio_1pc <0.22)
vic_comp %>%
ggplot(aes(x = mean_1pc, y = ratio_1pc)) +
geom_point() +
scale_y_log_eng(name = 'Ratio (FFA/RFFE)',
limits = c(0.1, 100)) +
scale_x_continuous(name = 'Average of AEP 1% estimates',
trans = 'log10',
labels = scales::comma,
limits = c(1,NA)) +
geom_hline(yintercept = 10^LLA, linetype = 'dashed') + # Upper confidence interval
geom_hline(yintercept = 10^ULA, linetype = 'dashed') + # Lower confidence interval
geom_hline(yintercept = 10^ratio_stats_log["mean_difference_log"], linetype = 'dashed') + # mean
geom_text(data = vic_comp_label,
aes(x = mean_1pc, y = ratio_1pc, label = station_id),
nudge_x = -0.05,
hjust = 'right',
colour = 'blue') +
geom_point(data = vic_comp_label,
aes(x = mean_1pc,
y = ratio_1pc),
colour = 'blue') +
annotate(geom = "text", x = 1, y = 10^ULA + 1, label = sprintf("mean + 2SD = %.2f", 10^ULA), hjust = 'left') +
annotate(geom = "text", x = 1, y = 10^LLA - 0.05, label = sprintf("mean - 2SD = %.2f", 10^LLA), hjust = 'left') +
annotate(geom = "text", x = 1,
y = 10^ratio_stats_log['mean_difference_log'] + 0.2,
label = sprintf("bias = %.2f", 10^ratio_stats_log['mean_difference_log']),
hjust = 'left')
# Make a web version
my_size = 0.1
my_text_size = 2
p <- vic_comp %>%
ggplot(aes(x = mean_1pc, y = ratio_1pc)) +
geom_point(size = my_size) +
scale_y_log_eng(name = 'Ratio (FFA/RFFE)',
limits = c(0.1, 100)) +
scale_x_continuous( name = bquote('Average of AEP 1% estimates (m' ^3 *'s' ^-1 *')'),
trans = 'log10',
labels = scales::comma,
limits = c(1,NA)) +
geom_hline(yintercept = 10^LLA, linetype = 'dashed', size = my_size) + # Upper confidence interval
geom_hline(yintercept = 10^ULA, linetype = 'dashed', size = my_size) + # Lower confidence interval
geom_hline(yintercept = 10^ratio_stats_log["mean_difference_log"], linetype = 'dashed', size = my_size) + # mean
geom_text(data = vic_comp_label,
aes(x = mean_1pc, y = ratio_1pc, label = station_id),
nudge_x = -0.05,
hjust = 'right',
colour = 'blue',
size = my_text_size) +
geom_point(data = vic_comp_label,
aes(x = mean_1pc,
y = ratio_1pc),
colour = 'blue',
size = my_size) +
annotate(geom = "text", x = 1, y = 10^ULA + 1,
label = sprintf("mean + 2SD = %.2f", 10^ULA),
hjust = 'left',
size = my_text_size) +
annotate(geom = "text", x = 1, y = 10^LLA - 0.05,
label = sprintf("mean - 2SD = %.2f", 10^LLA),
hjust = 'left',
size = my_text_size) +
annotate(geom = "text", x = 1,
y = 10^ratio_stats_log['mean_difference_log'] + 0.2,
label = sprintf("bias = %.2f", 10^ratio_stats_log['mean_difference_log']),
hjust = 'left',
size = my_text_size) +
theme_gray(base_size = 5)
p
ggsave(file.path('../figures', 'FFA-RFFE_ratio_vic.png'), p, width = 4, height = 3)
# Investigate outliers
vic_comp_label %>% View
# # A tibble: 5 x 11
# station_id station_name.x area RFFE_1pc_in RFFE_1pc_out diff_log mean_log ratio_1pc ratio_1pc_log
# <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 220002 Stockyard Cre… 75 3483. 411 0.928 3.29 8.47 0.928
# 2 223214 TAMBO RIVER @… 32 7.6 41 -0.732 1.39 0.185 -0.732
# 3 235209 AIRE RIVER @ … 21 2769 110 1.40 3.16 25.2 1.40
# 4 407236 MOUNT HOPE CR… 1629 16600. 616 1.43 3.93 26.9 1.43
# 5 415257 RICHARDSON RI… 1831 319. 1780 -0.747 3.02 0.179 -0.747
#
#
# Output to Excel
clip <- pipe("pbcopy", "w")
write.table(vic_comp_label, file=clip, sep = '\t', row.names = FALSE)
close(clip)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment