Last active
June 11, 2019 00:54
-
-
Save TonyLadson/28132f84030c66855218e278a1cb1c60 to your computer and use it in GitHub Desktop.
RFFE accuracy for Victorian gauges. See blog https://tonyladson.wordpress.com/2019/06/11/the-accuracy-of-the-rffe-in-victoria/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(stringr) | |
library(dplyr) | |
library(readxl) | |
library(readr) | |
library(purrr) | |
# Functions --------------------------------------------------------------- | |
# not in | |
"%ni%" <- function(x, y) x[!x %in% y] | |
# function to plot logarithmic minor breaks | |
# from https://stackoverflow.com/questions/30179442/plotting-minor-breaks-on-a-log-scale-with-ggplot | |
log_breaks = function(maj, radix=10) { | |
function(x) { | |
minx = floor(min(logb(x,radix), na.rm=T)) - 1 | |
maxx = ceiling(max(logb(x,radix), na.rm=T)) + 1 | |
n_major = maxx - minx + 1 | |
major_breaks = seq(minx, maxx, by=1) | |
if (maj) { | |
breaks = major_breaks | |
} else { | |
steps = logb(1:(radix-1),radix) | |
breaks = rep(steps, times=n_major) + | |
rep(major_breaks, each=radix-1) | |
} | |
radix^breaks | |
} | |
} | |
scale_x_log_eng = function(..., radix=10) { | |
scale_x_continuous(..., | |
trans=scales::log_trans(radix), | |
breaks=log_breaks(TRUE, radix), | |
minor_breaks=log_breaks(FALSE, radix)) | |
} | |
scale_y_log_eng = function(..., radix=10) { | |
scale_y_continuous(..., | |
trans=scales::log_trans(radix), | |
breaks=log_breaks(TRUE, radix), | |
minor_breaks=log_breaks(FALSE, radix)) | |
} | |
# Load Victorian data ------------------------------------------------------ | |
# Data are available at | |
# https://drive.google.com/file/d/1UfRW0ua_dU658gUyBIdAtpoOOMtQdH6x/view?usp=sharing | |
fname <- file.path('../data', "vic_stations.csv") | |
vic_gauges <- read_csv(fname, | |
col_types = cols( | |
.default = col_double(), | |
station_name = col_character(), | |
state = col_character(), | |
region.name = col_character() | |
)) | |
glimpse(vic_gauges) | |
# Load RFFE outputs ------------------------------------------------------- | |
# Data are available at | |
#https://drive.google.com/file/d/12gc_4SAEPX8h3AnEaXbRlcj2oKdKpJRA/view?usp=sharing | |
fname = file.path('../data', "RFFE_output_at_stations.csv" ) | |
RFFE_outputs <- read_csv(file = file.path('../data', "RFFE_output_at_stations.csv" )) | |
glimpse(RFFE_outputs) | |
# Compare ----------------------------------------------------------------- | |
# Add means, differences and ratios to data frame | |
vic_comp <- vic_gauges %>% | |
select(station_id, station_name, area, flow_1pc) %>% | |
left_join(RFFE_outputs, by = 'station_id') %>% | |
select(station_id, | |
station_name.x, | |
area, | |
flow_1pc.x, | |
flow_1pc.y) %>% | |
rename(RFFE_1pc_in = flow_1pc.x, | |
RFFE_1pc_out = flow_1pc.y) %>% | |
mutate(diff_log = log10(RFFE_1pc_in) - log10(RFFE_1pc_out)) %>% | |
mutate(mean_log = log10(0.5*(RFFE_1pc_in + RFFE_1pc_out ))) %>% | |
mutate(ratio_1pc = RFFE_1pc_in/RFFE_1pc_out) %>% | |
mutate(ratio_1pc_log = log10(RFFE_1pc_in/RFFE_1pc_out)) %>% | |
mutate(diff = RFFE_1pc_in - RFFE_1pc_out) %>% | |
mutate(mean_1pc = 0.5*(RFFE_1pc_in + RFFE_1pc_out )) | |
# Scatter plot | |
vic_comp %>% | |
ggplot(aes(x = RFFE_1pc_out ,y = RFFE_1pc_in )) + | |
geom_point() + | |
scale_x_log_eng(name = bquote('1% AEP from RFFE input data (m' ^3 *'s' ^-1 *')'), | |
labels = scales::comma, | |
limits = c(1,20000))+ | |
scale_y_log_eng(name = bquote('1% AEP estimate from RFFE ouput (m' ^3 *'s' ^-1 *')'), | |
labels = scales::comma, | |
limits = c(1,20000)) + | |
geom_abline(slope = 1, intercept = 0, linetype = 'dashed') + | |
geom_smooth(se = FALSE) | |
# Web version | |
my_size = 0.1 | |
my_text_size = 2 | |
p <- vic_comp %>% | |
ggplot(aes(x = RFFE_1pc_out ,y = RFFE_1pc_in )) + | |
geom_point(size = my_size) + | |
scale_x_log_eng(name = bquote('1% AEP from RFFE input data (m' ^3 *'s' ^-1 *')'), | |
labels = scales::comma, | |
limits = c(1,20000))+ | |
scale_y_log_eng(name = bquote('1% AEP estimate from RFFE ouput (m' ^3 *'s' ^-1 *')'), | |
labels = scales::comma, | |
limits = c(1,20000)) + | |
geom_abline(slope = 1, intercept = 0, linetype = 'dashed', size = my_size) + | |
geom_smooth(se = FALSE, size = my_size) + | |
theme_gray(base_size = 5) | |
p | |
ggsave(file.path('../figures', 'scatterplot_vic.png'), p, width = 4, height = 3) | |
ratio_stats_log <- | |
vic_comp %>% | |
#filter(! station_id %in% c(210084, 212040)) %>% | |
summarise(mean_difference_log = mean(ratio_1pc_log), | |
sd_diff_log = sd(ratio_1pc_log)) %>% | |
unlist() | |
ratio_stats_log | |
# mean_difference_log sd_diff_log | |
# 0.04094854 0.34756643 | |
10^ratio_stats_log["mean_difference_log"] | |
# mean_difference_log | |
# 1.098876 | |
ULA <- as.vector(ratio_stats_log["mean_difference_log"] + 2* ratio_stats_log["sd_diff_log"]) # Upper Limit | |
ULA | |
# [1] 0.7360814 | |
10^ULA | |
#[1] 5.446047 | |
LLA <- as.vector(ratio_stats_log["mean_difference_log"] - 2* ratio_stats_log["sd_diff_log"]) # Upper Limit | |
LLA | |
#[1] -0.6541843 | |
10^LLA | |
# [1] 0.2217255 | |
# make a data frame for labels | |
vic_comp_label<- | |
vic_comp %>% | |
filter(ratio_1pc > 7 | ratio_1pc <0.22) | |
vic_comp %>% | |
ggplot(aes(x = mean_1pc, y = ratio_1pc)) + | |
geom_point() + | |
scale_y_log_eng(name = 'Ratio (FFA/RFFE)', | |
limits = c(0.1, 100)) + | |
scale_x_continuous(name = 'Average of AEP 1% estimates', | |
trans = 'log10', | |
labels = scales::comma, | |
limits = c(1,NA)) + | |
geom_hline(yintercept = 10^LLA, linetype = 'dashed') + # Upper confidence interval | |
geom_hline(yintercept = 10^ULA, linetype = 'dashed') + # Lower confidence interval | |
geom_hline(yintercept = 10^ratio_stats_log["mean_difference_log"], linetype = 'dashed') + # mean | |
geom_text(data = vic_comp_label, | |
aes(x = mean_1pc, y = ratio_1pc, label = station_id), | |
nudge_x = -0.05, | |
hjust = 'right', | |
colour = 'blue') + | |
geom_point(data = vic_comp_label, | |
aes(x = mean_1pc, | |
y = ratio_1pc), | |
colour = 'blue') + | |
annotate(geom = "text", x = 1, y = 10^ULA + 1, label = sprintf("mean + 2SD = %.2f", 10^ULA), hjust = 'left') + | |
annotate(geom = "text", x = 1, y = 10^LLA - 0.05, label = sprintf("mean - 2SD = %.2f", 10^LLA), hjust = 'left') + | |
annotate(geom = "text", x = 1, | |
y = 10^ratio_stats_log['mean_difference_log'] + 0.2, | |
label = sprintf("bias = %.2f", 10^ratio_stats_log['mean_difference_log']), | |
hjust = 'left') | |
# Make a web version | |
my_size = 0.1 | |
my_text_size = 2 | |
p <- vic_comp %>% | |
ggplot(aes(x = mean_1pc, y = ratio_1pc)) + | |
geom_point(size = my_size) + | |
scale_y_log_eng(name = 'Ratio (FFA/RFFE)', | |
limits = c(0.1, 100)) + | |
scale_x_continuous( name = bquote('Average of AEP 1% estimates (m' ^3 *'s' ^-1 *')'), | |
trans = 'log10', | |
labels = scales::comma, | |
limits = c(1,NA)) + | |
geom_hline(yintercept = 10^LLA, linetype = 'dashed', size = my_size) + # Upper confidence interval | |
geom_hline(yintercept = 10^ULA, linetype = 'dashed', size = my_size) + # Lower confidence interval | |
geom_hline(yintercept = 10^ratio_stats_log["mean_difference_log"], linetype = 'dashed', size = my_size) + # mean | |
geom_text(data = vic_comp_label, | |
aes(x = mean_1pc, y = ratio_1pc, label = station_id), | |
nudge_x = -0.05, | |
hjust = 'right', | |
colour = 'blue', | |
size = my_text_size) + | |
geom_point(data = vic_comp_label, | |
aes(x = mean_1pc, | |
y = ratio_1pc), | |
colour = 'blue', | |
size = my_size) + | |
annotate(geom = "text", x = 1, y = 10^ULA + 1, | |
label = sprintf("mean + 2SD = %.2f", 10^ULA), | |
hjust = 'left', | |
size = my_text_size) + | |
annotate(geom = "text", x = 1, y = 10^LLA - 0.05, | |
label = sprintf("mean - 2SD = %.2f", 10^LLA), | |
hjust = 'left', | |
size = my_text_size) + | |
annotate(geom = "text", x = 1, | |
y = 10^ratio_stats_log['mean_difference_log'] + 0.2, | |
label = sprintf("bias = %.2f", 10^ratio_stats_log['mean_difference_log']), | |
hjust = 'left', | |
size = my_text_size) + | |
theme_gray(base_size = 5) | |
p | |
ggsave(file.path('../figures', 'FFA-RFFE_ratio_vic.png'), p, width = 4, height = 3) | |
# Investigate outliers | |
vic_comp_label %>% View | |
# # A tibble: 5 x 11 | |
# station_id station_name.x area RFFE_1pc_in RFFE_1pc_out diff_log mean_log ratio_1pc ratio_1pc_log | |
# <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> | |
# 1 220002 Stockyard Cre… 75 3483. 411 0.928 3.29 8.47 0.928 | |
# 2 223214 TAMBO RIVER @… 32 7.6 41 -0.732 1.39 0.185 -0.732 | |
# 3 235209 AIRE RIVER @ … 21 2769 110 1.40 3.16 25.2 1.40 | |
# 4 407236 MOUNT HOPE CR… 1629 16600. 616 1.43 3.93 26.9 1.43 | |
# 5 415257 RICHARDSON RI… 1831 319. 1780 -0.747 3.02 0.179 -0.747 | |
# | |
# | |
# Output to Excel | |
clip <- pipe("pbcopy", "w") | |
write.table(vic_comp_label, file=clip, sep = '\t', row.names = FALSE) | |
close(clip) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment