Created
May 8, 2018 14:38
-
-
Save pyroman1337/60883f60343f639558bdbff1a9483725 to your computer and use it in GitHub Desktop.
This is the script for "Restricting the nonlinearity parameter in a commonly used soil greenhouse gas flux calculation scheme for more reliable flux estimates" published on PLOSone
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################### | |
###### PAPER # 3 - Gas flux analysis ######### | |
############################################### | |
# library(devtools) | |
# install_bitbucket("ecoRoland/gasfluxes") | |
library(gasfluxes) | |
library(data.table) | |
library(ggplot2) | |
library(plot3D) | |
library(plotly) | |
# simulateflux <- function(t, V, A, c0 = 325, fmax = 5, nflux = 1, kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32))), # fmax = 100/36, kappa was multiplied by 3600 | |
simulateflux <- function(t, V, A, c0 = 325, fmax = 5, nflux = 25, kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32))), # fmax = 100/36, kappa was multiplied by 3600 | |
nMC , GC.sd, g.factor = 4, f.detect, | |
seed) { | |
if (!missing(seed)) set.seed(seed) | |
## model input data ## | |
h <- V/A | |
# f0 <- signif(seq(0,fmax,length.out = nflux),3) # all fluxes to be simulated from 0 to fmax; | |
# f0 <- signif(exp(seq(0,log(fmax+1),length.out = nflux)),5)-1 # all fluxes to be simulated from 0 to fmax; | |
f0 <- signif(c(0,exp(seq(-5,log(fmax),length.out=nflux-1))),3) # exponential f0 scale to have more data on small fluxes | |
# if (missing(f.detect)) { | |
# if (length(t) == 4L) { | |
# | |
# # f.detect <- 13.20 * t[4]^(-0.9973) * c0 * GC.sd/c0 * A | |
# # f.detect <- 0.036 # calculated with GC.sd = 5 | |
# f.detect <- 0.0808 # 0.0808 nmol N2O / m2 / s for GD.sd = 3 (always HMR) | |
# f.detect <- 0.08000 # nmol N2O / m2 / s for GD.sd = 3 equilibrium | |
# | |
# } else { | |
#simulate f.detect | |
f.detect <- 0 # starting value for f.detect | |
C00 <- c0 * 273.15 / 22.4 / (273.15 + 15) * 1000 #nmol N2O / m^3 | |
sdGC <- GC.sd * 273.15 / 22.4 / (273.15 + 15) * 1000 #from ppb GC.SD to nmol N / m^3 | |
# t = c(seq(0,01,length.out=4))*60 # c(0,1900,3000,3600) | |
for (i in 1:2) { | |
set.seed(24) | |
# sim <- data.frame(t = seq(0, 1, length.out = length(t)), C = rnorm(4e3, mean = C00, sd = sdGC), | |
# id = rep(1:1e3, each = length(t)), A = A, V = V) | |
sim <- data.frame(t = t, C = rnorm(4e3, mean = C00, sd = sdGC), | |
id = rep(1:1e3, each = length(t)), A = A, V = V) | |
simflux <- gasfluxes(sim, .id = "id", .times = "t", methods = c("HMR", "linear"), k_HMR = log(1.5/3600), plot = FALSE) | |
simflux[, f0 := linear.f0] # use linear estimate as default for this method | |
# simflux[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec mean(t) | |
# simflux[, kappa.max := abs(linear.f0/f.detect/max(t))] | |
# simflux[, kappa.max := linear.f0/f.detect / (t.inf - sum(t)) ] # tfactor (t.inf - sum(t))/3600 | |
simflux[ !is.na(HMR.f0), f0 := HMR.f0] # | |
n.HMR <- simflux[ !is.na(HMR.f0), .N] # | |
f.detect <- simflux[, quantile(f0, 0.975)] # 0.0808 nmol N2O / m2 / s for GD.sd = 3 (always HMR) | |
write.table(t(c(as.numeric(f.detect),GC.sd,n.HMR)),file="fdetect-out.csv",append=T,col.names = F, sep=";") | |
} | |
# selectfluxes(res, "RH2017", f.detect = f.detect) | |
# res[method == "HMR", .N] #9 | |
# | |
# stop("Detection limit can only be estimated for flux measurements with three or four time points. Please specify the f.detect parameter.") | |
# } | |
# } | |
# Ct.test <- (c0 - fmax/(-tail(kappa, 1) * h)) + fmax * exp(-tail(kappa, 1)*t)/(-tail(kappa, 1)*h) # in c0 in ppb | |
Ct.test <- (C00 - fmax/(-tail(kappa, 1) * h)) + fmax * exp(-tail(kappa, 1)*t)/(-tail(kappa, 1)*h) # in c0 in ppb | |
# neftel.rsq <- signif(summary(lm(Ct.test ~t))$r.squared,5) # calculation of R.lim (minimal R.squared for a given system) | |
# prepare input for flux calculation for all simulated fluxes | |
df <- data.table(V=V, A=A, time=t) | |
df <- df[rep(seq_len(nrow(df)), nMC)] | |
df[, mc := rep(seq_len(nMC), each = length(t))] | |
df <- df[rep(seq_len(nrow(df)), length(f0) * length(kappa))] | |
combs <- CJ(f0 = f0, kappa = kappa) | |
df[, c("f0", "kappa") := combs[rep(seq_len(nrow(combs)), each = length(t) * nMC)]] | |
df[, phi := c0 - f0/(-kappa*h)] | |
df[, C := phi + f0 * exp(-kappa*t)/(-kappa*h) + rnorm(.N, sd = GC.sd)] | |
#flux calculation | |
message("Calculating ", nrow(df) / length(t), " fluxes. Please be patient... jaah ganz easy!") | |
res <- gasfluxes(df, c("mc", "f0","kappa", "phi"),methods = c("linear", "robust linear", "HMR"), gfactor = g.factor, plot = FALSE, | |
verbose = FALSE,k_HMR= log(1.5/3600)) | |
# res[, rsq := df[, cor(C, time)^2, by = c("mc", "kappa", "phi")][["V1"]]] #have gasfluxes return rsq instead | |
############################### | |
### decision making section ### | |
############################### | |
# RF2011new | |
res <- selectfluxes(res, select = "RF2011new") | |
setnames(res,"flux","RF2011")#[] | |
# ## dynamic R.squared method, dyn.rsq ## | |
# f.detect.rh <- res[f0 == 0, quantile(linear.f0, 0.95)] # detection limit is 95 % quantile of all linear zero fluxes, correct?????? | |
# res[, rsq.lim := ((3*f.detect.rh)^2 + neftel.rsq*linear.f0^2) / ((3*f.detect.rh)^2 + linear.f0^2)] # formula that calculates the R² threshold allowing for nonlinear fluxes; reformulated dyn.rsq definition by Roland | |
# res[, range(rsq.lim)] | |
# res[, dyn.rsq := linear.f0] | |
# res[rsq > rsq.lim & !is.na(HMR.f0), dyn.rsq := HMR.f0] # use HMR estimate if HMR was fitted and method requirements are met | |
# | |
### implemented in the selectflux function ### | |
# res <- selectfluxes(res, "RH2017", f.detect = f.detect, beta = 3, R.lim = neftel.rsq) | |
# res[, dyn.Rsq := flux] | |
# res <- selectfluxes(res, "RH2017", f.detect = f.detect, | |
# dyn.Rsquared = function(f.lin, R.lim, f.detect) ((30 * f.detect)^2 + R.lim * f.lin^2)/((30 * f.detect)^2 + f.lin^2) | |
# ) | |
# ## minimum detectable flux scheme, f.det using f.detect ## | |
# res[, f.det := linear.f0] # use linear estimate as default for this method | |
# res[linear.f0 > 2*f.detect & !is.na(HMR.f0), f.det := HMR.f0] | |
# | |
## g-factor scheme, g.fact ## | |
res[, g.fact := robust.linear.f0] # use robust linear estimate as default for this method | |
res[abs(HMR.f0/linear.f0)<g.factor & !is.na(HMR.f0), g.fact := HMR.f0] #note the abs!!! | |
res[is.na(g.fact), g.fact := linear.f0] # use linear if robust and HMR dont work | |
## p-value scheme, p.value ## | |
# res[, p.value := linear.f0] # use linear estimate as default for this method | |
# res[ HMR.f0.p < linear.f0.p & !is.na(HMR.f0), p.value := HMR.f0] #note the abs!!! | |
# | |
## fix Rsq ## | |
# res[, Rsq.fix := linear.f0] # use linear estimate as default for this method | |
# res[ 0.8 < (linear.r)^2 & !is.na(HMR.f0), Rsq.fix := HMR.f0] #note the abs!!! | |
## fix kappa ## | |
# res[, kappa.fix := linear.f0] # use linear estimate as default for this method | |
# res[ 5/3600 > HMR.kappa & !is.na(HMR.f0), kappa.fix := HMR.f0] #note the abs!!! | |
# | |
## dynamic kappa scheme, kappa.max ## | |
res[, kappa.max := robust.linear.f0] # use robust linear estimate as default for this method | |
res[, kappa.Max := linear.f0/f.detect/max(t)] | |
res[ HMR.kappa < kappa.Max & !is.na(HMR.f0), kappa.max := HMR.f0] # | |
res[is.na(kappa.max), kappa.max := linear.f0] # use linear if robust and HMR dont work | |
# ## dynamic kappa scheme, kappa.max + AIC ## | |
# res[, kappa.AIC := linear.f0] # use linear estimate as default for this method | |
# res[, kappa.AIC := linear.f0/f.detect/max(t)] | |
# res[ HMR.kappa < kappa.AIC & !is.na(HMR.f0), kappa.AIC := HMR.f0] # | |
## dynamic kappa scheme PLUS, dyn.kpp ## | |
# res[, dyn.kpp := linear.f0] # use linear estimate as default for this method res[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec mean(t) | |
# res[, t.inf := (HMR.phi-linear.C0)* h / linear.f0] # sec corrected version | |
# # res[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec by h instead of * h | |
# res[, kpp.max := linear.f0 / f.detect / (t.inf- max(t)) ] # ## tfactor (t.inf - sum(t))/3600 | |
# # res[, kpp.max := 1 / (t.inf) ] # tfactor (t.inf - sum(t))/3600 | |
# # res[, kpp.max := abs(linear.f0/f.detect) / (var(t) / max(t)) ] # variance into t.factor introduced | |
# res[ HMR.kappa < kpp.max & !is.na(HMR.f0), dyn.kpp := HMR.f0] # | |
## Rolands new scheme, fuss.2017 ## | |
res[, RF2017 := robust.linear.f0] # use linear estimate as default for this method | |
res[HMR.kappa < (20/3600) & !is.na(HMR.f0), RF2017 := HMR.f0] # Rolands new criteria explained by email, kappa smaller 5 | |
res[is.na(RF2017), RF2017 := linear.f0] # use linear if robust and HMR dont work | |
## Thus I used HMR if it could be fit and kappa was below 20 h^-1 (in practice this resulted in kappa < 5 for all fluxes accepted as non-linear). ## | |
## AIC scheme ## | |
res[, AIC := robust.linear.f0] # use linear estimate as default for this method | |
res[ HMR.AIC < linear.AIC & !is.na(HMR.f0), AIC := HMR.f0] # & HMR.kappa < 20/3600 ; Rolands new criteria explained by email, kappa smaller 5 | |
res[is.na(AIC), AIC := linear.f0] # use linear if robust and HMR dont work | |
## HMR-linear combination scheme, HMR.f0 ## | |
res[is.na(HMR.f0), HMR.f0 := linear.f0] # Use linear for HMR if no HMR could be fitted | |
res[is.na(robust.linear.f0), robust.linear.f0 := linear.f0] | |
class(res) <- c("gasfluxes.simu", class(res)) | |
res[] #return value | |
} | |
############################### | |
### start actual simulation ### | |
# simus <- simulateflux(t = c(seq(0,36,length.out=4))/60, V = 0.0085, A = 0.0707, nMC = 10, seed = 42) | |
# system.time(simus <- simulateflux(t = c(seq(0,60,length.out=4))*60, V = 0.015, A = 0.07, GC.sd = 3, nMC = 25, seed = 88)) # fibl system, takes 3.5 min for nMC = 40 | |
system.time(simus <- simulateflux(t = c(seq(0,36,length.out=4))*60, V = 0.014, A = 0.07, GC.sd = 3, nMC = 100, seed = 12)) # Nora system, takes 3.5 min for nMC = 40 | |
names(simus)[30] <- "dyn.Rsq" | |
# simus.55.backup <-simus.45.backup | |
# simus.40.backup <- simus | |
# simu.rf2011 <- selectfluxes(simus, "RF2011new") | |
# simus$g.fact <- simu.rf2011$flux | |
# dyn.Rsquared = function(f.lin, R.lim, f.detect) ((30 * f.detect)^2 + R.lim * f.lin^2)/((30 * f.detect)^2 + f.lin^2) | |
# simu.dynRsqpls <- selectfluxes(simus, "RH2017", f.detect = f.detect) | |
#relatively slow: | |
# system.time(simus <- simulateflux(t = c(seq(0,36,12))/60, V = 0.0085, A = 0.0707, nMC = 100, seed = 42)) | |
#on my (fast) system: 400 seconds | |
# simus[HMR.f0 > 3.3, HMR.f0 := 3.3] # ??? what was that good for? ... limit HMR.f0 estimate on 3.3?? | |
summary.gasfluxes.simu <- function(x) { | |
x <- melt(x, id.vars = c("mc", "f0", "kappa", "phi"), | |
# measure.vars = c("linear.f0","robust.linear.f0","HMR.f0","g.fact","kappa.max","AIC"), | |
measure.vars = c("linear.f0","robust.linear.f0","HMR.f0","AIC","g.fact","kappa.max","RF2011","RF2017"), # | |
# measure.vars = c("linear.f0","kappa.max","HMR.f0"), | |
variable.name = "method", value.name = "flux.estimate") | |
x <- x[, .(median = median(flux.estimate), | |
mean = mean(flux.estimate), | |
sd = sd(flux.estimate,na.rm = T), | |
lwr = quantile(flux.estimate, 0.05,na.rm=T), | |
upr = quantile(flux.estimate, 0.95,na.rm=T), | |
var = var(flux.estimate,na.rm = T)), by = c("f0", "kappa", "phi", "method")] | |
x[, IQR := upr - lwr] | |
x[, bias:= mean - f0] | |
x[, MSE := bias^2 + var] | |
class(x) <- c("gasfluxes.simu.summary", class(x)) | |
x[] | |
} | |
contour.fix <- function(df) { | |
colname <- names(df) | |
names(df) <- c("x", "y", "z") | |
Range <- as.data.frame(sapply(df, range)) | |
Range[1,1] <- 1E-6 | |
Dim <- as.data.frame(t(sapply(df, function(x) length(unique(x))))) | |
arb_z = Range$z[1] - diff(Range$z)/Dim$x | |
df2 <- rbind(df, | |
expand.grid(x = c(exp(log(Range$x[1]) - diff(log(Range$x))/Dim$x), exp(log(Range$x[2]) + diff(log(Range$x))/Dim$x)), | |
y = unique(df$y), z = arb_z), # seq(Range$y[1], Range$y[2], length = Dim$y) | |
expand.grid(x = unique(df$x), # seq(Range$x[1], Range$x[2], length = Dim$x) | |
y = c(exp(log(Range$y[1]) - diff(log(Range$y))/Dim$y), exp(log(Range$y[2]) + diff(log(Range$y))/Dim$y)), z = arb_z)) | |
# g <- ggplot(df2, aes(x, y, z = z)) + labs(x = colname[1], y = colname[2], fill = colname[3]) + | |
# stat_contour(geom="polygon", aes(fill=..level..),bins =50) + | |
# coord_cartesian(xlim=c(Range$x), ylim=c(Range$y), expand = F) + | |
# scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) + | |
# theme_bw() | |
names(df2) <- c("f0", "kappa", "stat.x") | |
return(df2) | |
} | |
simus.sum <- summary(simus) | |
# simus.sumus <- summary.gasfluxes.simu(simus) | |
# head(simus.sum) | |
# head(simus.sumus) | |
# median(simus$dyn.Rsq) | |
# simus.sum[IQR > 0.5, IQR := 0.5] # Use linear for HMR if no HMR could be fitted | |
# simus.sum[, IQR := log(IQR)] # use logarithmic IQR values | |
# simus.sum[, var := log(var)] # use logarithmic variance values | |
# simus.sum[, MSE := log(MSE)] # use logarithmic MSE values | |
plot.gasfluxes.simu.mean <- function(x, type = "contour", stat = "median") { | |
if (type == "contour") { | |
simus.plot <- simus.sum[,contour.fix(.(f0,kappa,median)),method] | |
p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) + | |
stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) + | |
facet_wrap(~ method) + | |
scale_fill_distiller(name = sprintf("flux estimate\n(%s)", stat), palette = "BrBG") + # BRBG "YlGnBu" RdYlGn" | |
scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) + | |
coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) + | |
theme_bw() | |
return(print(p)) | |
} | |
} | |
plot.gasfluxes.simu.mean(simus.sum ) | |
plot.gasfluxes.simu.iqr <- function(x, type = "IQR.plot", stat = "IQ90") { | |
if (type == "IQR.plot") { | |
simus.plot <- simus.sum[,contour.fix(.(f0,kappa,log(IQR))),method] | |
p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) + | |
stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) + | |
facet_wrap(~ method) + | |
scale_fill_distiller(name = sprintf("estimated\n log(%s)", stat), palette = "RdYlGn") + # BrBG "YlGnBu" RdYlGn" | |
scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) + | |
coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) + | |
theme_bw() | |
# annotate("text", x = 2, y=1E-5, label = c("blablab")) | |
return(print(p)) | |
} | |
} #more plot types can be implemented | |
plot.gasfluxes.simu.iqr(simus.sum) | |
plot.gasfluxes.simu.var <- function(x, type = "var.plot", stat = "var") { | |
if (type == "var.plot") { | |
simus.plot <- simus.sum[,contour.fix(.(f0,kappa,log(var))),method] | |
p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) + | |
stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) + | |
facet_wrap(~ method) + | |
scale_fill_distiller(name = sprintf("estimated\n log(%s)", stat), palette = "RdYlGn") + # BrBG "YlGnBu" RdYlGn" | |
scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) + | |
coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) + | |
theme_bw() | |
# annotate("text", x = 2, y=1E-5, label = c("blablab")) | |
return(print(p)) | |
} | |
} #more plot types can be implemented | |
plot.gasfluxes.simu.var(simus.sum) | |
plot.gasfluxes.simu.var <- function(x, type = "var.plot", stat = "MSE") { | |
if (type == "var.plot") { | |
simus.plot <- simus.sum[,contour.fix(.(f0,kappa,log(MSE))),method] | |
p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) + | |
stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) + | |
facet_wrap(~ method) + | |
scale_fill_distiller(name = sprintf("estimated\n log(%s)", stat), palette = "RdYlGn") + # BrBG "YlGnBu" RdYlGn" | |
scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) + | |
coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) + | |
theme_bw() | |
# annotate("text", x = 2, y=1E-5, label = c("blablab")) | |
return(print(p)) | |
} | |
} #more plot types can be implemented | |
plot.gasfluxes.simu.var(simus.sum) | |
simus.sum[,mean(exp(IQR)),method] # average IQR for each method | |
simus.sum[f0 == 0, .(quantile(mean,0.975)), method] # detection limit for each method | |
### ---- | |
# plot(nora.dataset$flux,log(nora.dataset$kappa), xlim = c(0,2.78), ylim = c(1E-5,1E-2),col="black") | |
# plot.gasfluxes.hist(simus.sum) | |
# simus.sum[simus.sum$method=="dyn.Rsq" & simus.sum$f0 == 0.333 ] | |
simus.sum.backup <- simus.sum | |
# | |
# simus.sum.tmax13 <- simus.sum | |
# | |
# | |
# simus.60 <- simus.sum.tmax60[method=="kappa.max"] | |
# simus.60[,method:="t.max = 60 min"] | |
# simus.45 <- simus.sum.tmax45[method=="kappa.max"] | |
# simus.45[,method:="t.max = 45 min"] | |
# simus.30 <- simus.sum.tmax30[method=="kappa.max"] | |
# simus.30[,method:="t.max = 30 min"] | |
# simus.12 <- simus.sum.tmax12[method=="kappa.max"] | |
# simus.12[,method:="t.max = 12 min"] | |
# simus.75 <- simus.sum.tmax75[method=="kappa.max"] | |
# simus.75[,method:="t.max = 75 min"] | |
# simus.90 <- simus.sum.tmax90[method=="kappa.max"] | |
# simus.90[,method:="t.max = 90 min"] | |
# simus.GCsd <- rbind(simus.15,simus.09,simus.06,simus.03,simus.01) | |
# simus.GCsd <- rbind(simus.13,simus.30,simus.45,simus.60,simus.75,simus.90) | |
# simus.sum <- simus.GCsd | |
# plot.gasfluxes.simu.iqr(simus.sum) | |
########################################### | |
#### density frequence comparision ######## | |
########################################### | |
# load a GHG flux dataset... | |
# lysi.gfg <- read.table("gfg_Lysi.csv",sep=",",header=T) | |
## add fibl data | |
# fibl.raw <- read.table("/home/roman/PhD/data/GasFluxGames/Fibl/N2O_Rohdaten.csv",header=T, sep = ",") | |
# fibl.raw <- read.table("/home/hueppir/nashome/Gasflux-Simulations/GasFluxGames/Fibl/N2O_Rohdaten.csv",header=T, sep = ",") | |
fibl.raw <- fread("/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/N2O_Rohdaten.csv",header=T, sep = ",") | |
fibl.raw$time <- fibl.raw$time * 3600 # we use seconds instead of hours | |
# art.raw <- fread("/home/hueppir/PhD/data/Reckenholz2014/fluxes-n2o/n2o-input-complete.csv",header=T,sep = ",") | |
# art.raw$V <- art.raw$V / 1000 | |
# fibl.raw <- art.raw | |
fibl.raw$C <- fibl.raw$C * 1E3 / 22.4136 * (273.15/(15 + 273.15)) # n mol N2O/m^3 ;22.4L/mol;273K, | |
# flux.ghg <- gasfluxes(fibl.raw, methods = c("linear", "robust linear", "HMR"), k_HMR = log(1.5/3600),plot = T) # fluxes in nmol-N2O/m²/s | |
flux.ghg <- gasfluxes(na.omit(fibl.raw), | |
methods = c("linear", "robust linear", "HMR"), | |
.id = c("ID", "V", "A"), | |
.V = "V", .A = "A", .times = "time", .C = "C", k_HMR = log(1.5/3600),plot = F) # fluxes in nmol-N2O/m²/s | |
# plot_ly(flux.ghg,x = flux.ghg$linear.C0, y = flux.ghg$HMR.phi) | |
# plot_ly(data=flux.ghg, x = flux.ghg$HMR.kappa, y = flux.ghg$t.phi ) | |
# write.table(flux.ghg, "/home/roman/PhD/data/GasFluxGames/Fibl/fibl-n2o-fluxes.csv", sep = ";", row.names = F) | |
write.table(flux.ghg, "/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-fluxes_detailed.csv", sep = ";", row.names = F) | |
# write.table(flux.ghg, "/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Reckenholz/art-n2o-fluxes_detailed.csv", sep = ";", row.names = F) | |
flux.ghg.backup <- flux.ghg | |
flux.ghg <- fread("/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-fluxes_detailed.csv") | |
# always HMR if fitted | |
flux.ghg[is.na(HMR.f0), HMR.f0 := linear.f0] # Use linear for HMR if no HMR could be fitted | |
flux.ghg[is.na(HMR.f0), HMR.f0 := robust.linear.f0] | |
flux.ghg[is.na(robust.linear.f0) & is.na(HMR.f0.se),HMR.f0 := linear.f0] | |
flux.ghg[HMR.f0 > 5000, HMR.f0 := linear.f0] ## avoid the very large HMR fluxes | |
### kappa.max method | |
f.detect <- 0.069 # from calculation 0.028 | |
t <- c(seq(0,36,length.out=4))*60 | |
flux.ghg[, kappa.max := robust.linear.f0] # use linear estimate as default for this method | |
flux.ghg[is.na(kappa.max), kappa.max := linear.f0] # use linear estimate as default for this method | |
flux.ghg[is.na(robust.linear.f0) & is.na(HMR.f0.se),kappa.max := linear.f0] | |
flux.ghg[, kappa.dyn := abs(linear.f0/f.detect/max(t))] | |
flux.ghg[ HMR.kappa < kappa.dyn & !is.na(HMR.f0.se), kappa.max := HMR.f0] # | |
# flux.ghg[HMR.kappa < kappa.max & !is.na(HMR.f0.se), .N] | |
# flux.ghg[,mean(dyn.kappa,na.rm = T)] | |
### AIC method | |
flux.ghg[, AIC := robust.linear.f0] # use linear estimate as default for this method | |
flux.ghg[ HMR.AIC < linear.AIC & !is.na(HMR.f0.se), AIC := HMR.f0] # | |
flux.ghg[is.na(robust.linear.f0.se) & is.na(HMR.f0.se),AIC := linear.f0] | |
# flux.ghg[ HMR.AIC < linear.AIC & !is.na(HMR.f0.se), .N] | |
# flux.ghg[,mean(AIC,na.rm = T)] | |
### g.fact | |
flux.ghg[, g.fact := robust.linear.f0] # use linear estimate as default for this method | |
flux.ghg[HMR.f0/robust.linear.f0 < 4 & !is.na(HMR.f0.se), g.fact := HMR.f0] # Rolands new criteria explained by email, kappa smaller 5 | |
flux.ghg[is.na(g.fact), g.fact := linear.f0] # Use linear for HMR if no HMR could be fitted | |
# flux.ghg[HMR.f0/robust.linear.f0 < 4 & !is.na(HMR.f0.se) , .N] | |
# flux.ghg[,mean(g.fact,na.rm = T)] | |
## Rolands new scheme, fuss.2017 ## | |
max.kp <- 20 | |
flux.ghg[, RF2017 := robust.linear.f0] # use linear estimate as default for this method | |
flux.ghg[HMR.kappa < (max.kp/3600) & !is.na(HMR.f0.se), RF2017 := HMR.f0] # Rolands new criteria explained by email, kappa smaller 5 | |
flux.ghg[is.na(RF2017), RF2017 := linear.f0] # Use linear for HMR if no HMR could be fitted | |
# flux.ghg[HMR.kappa < max.kp/3600 & !is.na(HMR.f0.se), .N] | |
# flux.ghg[,mean(fuss.2017,na.rm = T)] | |
### RF2011new; bad p-value criteria | |
selectfluxes(flux.ghg, select = "RF2011new") | |
setnames(flux.ghg,"flux","RF2011")[] | |
### Robust linear use linear if only 3 data points | |
flux.ghg[is.na(robust.linear.f0), robust.linear.f0 := linear.f0] # Use linear for HMR if no HMR could be fitted | |
# flux.ghg$robust.linear.f0[is.na(flux.ghg$robust.linear.f0)] <- flux.ghg$linear.f0[is.na(flux.ghg$robust.linear.f0)] | |
# flux.ghg[method == "HMR", .N] | |
# flux.ghg[,mean(flux,na.rm = T)] | |
# method.tab$mean[3] <- flux.ghg[,mean(HMR.f0,na.rm = T)] | |
# flux.ghg[!is.na(HMR.AIC), .N] | |
# fibl.melt <- melt(flux.ghg, id.vars = c("ID"), | |
# measure.vars = c("linear.f0", "HMR.f0","robust.linear.f0"), | |
# variable.name = "method", value.name = "flux.estimate") | |
# fibl.results <- fibl.melt[,.(mean.flux=mean(flux.estimate,na.rm = T)),method] | |
# flux.ghg <- flux.ghg.backup | |
# flux.ghg <- fread("/home/hueppir/nashome/Gasflux-Simulations/GasFluxGames/Reckenholz/art-n2o-fluxes_detailed.csv") | |
# selectfluxes(flux.ghg, select = "RH2017", R.lim = 0.6, f.detect = 0.067) | |
# flux.ghg <- selectfluxes(flux.ghg, select = "RH2017", R.lim = 0.352, f.detect = 0.39) | |
# method.tab <- data.table(methods = c("linear","robust linear","HMR","AIC","g-factor","kappa.max","RF2011","RF2017")) | |
# matrix(NA, nrow = 8, ncol = 6) | |
# row.names(method.tab) <- c("linear","robust linear","HMR","AIC","g-factor","kappa.max","RF2011","RF2017") | |
# names(method.tab) <- c("mean","n HMR","n linear","nonlinear/linear","fdet","MSE") | |
# method.tab$mean <- NA | |
# method.tab$mean[1] <- flux.ghg[,mean(linear.f0)] | |
# method.tab$mean[2] <- flux.ghg[,mean(robust.linear.f0,na.rm=T)] | |
# flux.ghg[, .(mean = mean)] | |
# flux.ghg[method == "HMR", .N] | |
# flux.ghg[,mean(flux,na.rm = T)] | |
# HMR method, robust.linear alternatively | |
# method.tab$nonlinear.linear <- NA | |
# method.tab$bias <- NA | |
# method.tab$IQ90 <- NA | |
# method.tab$MSE <- NA | |
# flux.ghg[is.na(robust.linear.f0),.N] | |
# flux.ghg[is.na(robust.linear.f0),robust.linear.f0 := linear.f0] | |
# method.tab$bias <- flux.ghg[,mean(robust.linear.f0,na.rm = T)] | |
# h <- 0.2 | |
# f.detect <- 0.04 | |
# ### kpp.max (dyn.kappa) method that is more restrictive and uses t.meas equal to t.phi-t.meas | |
# flux.ghg[, dyn.kappa.plus := robust.linear.f0] # use linear estimate as default for this method | |
# flux.ghg[is.na(dyn.kappa.plus), dyn.kappa.plus := linear.f0] # use linear estimate as default for this method | |
# flux.ghg[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec mean(t) | |
# flux.ghg[, kpp.max := linear.f0/f.detect / (t.inf - sum(t)) ] # tfactor (t.inf - sum(t))/3600 | |
# # res[, kpp.max := abs(linear.f0/f.detect) / (var(t) / max(t)) ] # variance into t.factor introduced | |
# flux.ghg[ HMR.kappa < kpp.max & !is.na(HMR.f0), dyn.kappa.plus := HMR.f0] # | |
# | |
# flux.ghg[HMR.kappa < kpp.max, .N] | |
# flux.ghg[,mean(dyn.kappa.plus,na.rm = T)] | |
# plot_ly(data=flux.ghg, x = flux.ghg$HMR.kappa, y = flux.ghg$HMR.phi ) | |
geo_mean <- function(data) { | |
min_data <- min(data,na.rm = T) | |
log_data <- log(data-min_data) | |
gm <- exp(mean(log_data[is.finite(log_data)]))+min_data | |
return(gm) | |
} | |
# ### RF2011old; wrong p-values | |
# flux.ghg.orig <- gasfluxes(fibl.raw, methods = c("linear", "robust linear", "original HMR"), k_HMR = log(1.5/3600),plot = F) # fluxes in nmol-N2O/m²/s | |
# write.table(flux.ghg.orig, "/home/roman/PhD/data/GasFluxGames/Fibl/fibl-n2o-originalHMR-fluxes.csv", sep = ";", row.names = F) | |
# selectfluxes(flux.ghg.orig, select = "RF2011") | |
# flux.ghg.orig[method == "original HMR", .N] | |
# flux.ghg.orig[,mean(flux,na.rm = T)] | |
flux.ghg.DT <- flux.ghg | |
write.table(flux.ghg, "/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-results_detailed2.csv", sep = ";", row.names = F) | |
flux.ghg <- read.table("/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-results_detailed2.csv",sep=";",header = T) | |
# flux.ghg <- read.table("L:/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-results_detailed2.csv",sep=";",header = T) | |
# flux.ghg[,..cols] | |
hist.eval <- function(data, fcs.method, #fcs.parameter = "IQR", | |
fmax = 5, nflux = 25, kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32))), | |
f0 = signif(c(0,exp(seq(-5,log(fmax),length.out=nflux-1))),3)) { | |
# substitute(x) | |
flux.ghg <- as.data.frame(data) | |
# fcs.method <- substitute(kappa.max) | |
# flux.ghg <- read.table("F:/SAE/Matti/Nora/Nora-fluxes-n2o_all.csv",sep = ";",header = T) | |
# # flux.ghg <- read.table("/home/roman/SAE/Matti/Nora/Nora-fluxes-n2o_all.csv",sep = ";",header = T) | |
# fmax <- 5 # 100/36 | |
# nflux <- 25 | |
# kappa <- c(0.0001/36,exp(seq(-11,-3.5,0.32))) | |
# f0 <- signif(c(0,exp(seq(-5,log(fmax),length.out=nflux-1))),3) # exponential f0 scale to have more data on small fluxes | |
# kappa.na <- 0.0001 | |
# kappa.na <- 0.0001/36 # = min(kappa) 1E-6 | |
# flux.ghg$flux.hist <- flux.ghg$rsq.rh | |
# nora.dataset <- data.frame(flux = flux.ghg$linear.f0, kappa= flux.ghg$HMR.kappa) ## prepare dataset for 3d plot combination | |
# nora.dataset$kappa[is.na(nora.dataset$kappa)] <- 0.01 # NA flux values are 0 | |
# simus[ ,nora.kappa:=flux.ghg$HMR.kappa , method] | |
# mean.flux.dataset <- mean(flux.ghg$flux,na.rm=T) | |
### reframe measured data into model space ### | |
flux.ghg$flux.hist <- flux.ghg[,fcs.method] # flux.ghg$linear.f0 ## insert actual method results here!! | |
flux.ghg$flux.hist[is.na(flux.ghg$flux.hist)] <- 0 # NA flux values are 0 | |
flux.ghg$flux.hist[flux.ghg$flux.hist < 0] <- abs(flux.ghg$flux.hist[flux.ghg$flux.hist < 0]) # transform negative to positive fluxes | |
flux.ghg$flux.hist[flux.ghg$flux.hist>fmax] <- fmax ## 100 # fmax 2.8 % of Maikes fluxes are higher than that | |
flux.ghg$kappa.hist <- flux.ghg$HMR.kappa | |
kappa.hist <- kappa | |
flux.ghg$kappa.hist[flux.ghg$HMR.diagnostics=="singulärer Gradient"] <- min(kappa.hist) # lower end of simulation "singular gradient" | |
flux.ghg$kappa.hist[is.na(flux.ghg$kappa.hist)] <- max(kappa.hist)*0.95 # higher end of simulation | |
# flux.ghg$kappa.hist[ flux.ghg$kappa.hist > max(kappa.hist)] <- max(kappa.hist) # higher end of simulation | |
# flux.ghg$kappa.hist[ flux.ghg$kappa.hist < min(kappa.hist)] <- min(kappa.hist) # lower end of simulation | |
# hist(flux.ghg$kappa.hist,40) | |
lysi.gfg <- as.matrix(cbind(flux.ghg$flux.hist,flux.ghg$kappa.hist)) | |
# plot(lysi.gfg[,1],lysi.gfg[,2],xlab="linear f0",ylab="kappa") | |
# plot(log(lysi.gfg[,2]),lysi.gfg[,1],col="blue",ylab="estimated flux",xlab="kappa",pch=20,cex=0.5) | |
# plot(log(flux.ghg$HMR.kappa),flux.ghg$rsq.rh,col="blue",ylab="estimated flux",xlab="kappa",pch=20,cex=0.5) | |
# points(flux.ghg$HMR.f0,lysi.gfg[,2],col="blue") | |
# | |
# # lysi.gfg[order(lysi.gfg[,1]),] | |
x <- abs(lysi.gfg[,1]) # vector with all fluxes | |
y <- lysi.gfg[,2] # vector with all coresponding kappa | |
# my_hist3d <- function(x, y, freq=FALSE, nclass="auto") { | |
n<-length(x) | |
kappa[1] <- 0.000001 | |
# if (nclass == "auto") { nclass<-ceiling(sqrt(nclass.Sturges(x))) } | |
nclass <- length(kappa) | |
# breaks.x <- seq(min(x),max(x),length=(nclass+1)) | |
# breaks.x <- seq(0,fmax,length.out=26)# c(f0,104)-2 | |
breaks.x <- c(f0*0.95,fmax*1.05) # c(f0,104)-2 | |
# breaks.y <- seq(min(y),max(y),length=(nclass+1)) | |
breaks.y <- log(c(kappa*0.95,max(kappa)*1.05)) # c(0.0001,exp(seq(-11,-3.2,0.3))*3600) ; kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32))) | |
h <- NULL | |
for (i in 1:nclass) | |
for (j in 1:nclass) | |
h <- c(h, sum(x <= breaks.x[j+1] & x >= breaks.x[j] & log(y) <= breaks.y[i+1] & log(y) >= breaks.y[i] ) ) | |
# if (freq) h <- h / n | |
# xx <-round(mean(breaks.x[1:2])+(0:(nclass-1))*diff(breaks.x[1:2]), 1) | |
xx <-f0 | |
xx[1] <- 0.000001 | |
# yy <- as.factor(round(mean(breaks.y[1:2])+(0:(nclass-1))*diff(breaks.y[1:2]), 1)) | |
# yy <- (length(breaks.y)-1):1 # as.factor(round(mean(breaks.y[1:2])+(0:(nclass-1))*diff(breaks.y[1:2]), 1)) | |
yy <- 1:(length(breaks.y)-1) # as.factor(round(mean(breaks.y[1:2])+(0:(nclass-1))*diff(breaks.y[1:2]), 1)) | |
res <- cbind(expand.grid(xx,yy), h) | |
# kappa[1] <- 0.04/3600 | |
colnames(res) <- c("f0","kappa","Frequency") | |
res.matrix <- t(data.matrix(dcast(res,kappa~f0))[,-1]) | |
res.matrix <- res.matrix[,-25] | |
colnames(res.matrix) <- as.character(signif(kappa[-25],2)) | |
# hist.results <- mean(flux.ghg[[fcs.method]],na.rm = T) # calculate the mean flux of the dataset | |
hist.results <- geo_mean(flux.ghg[[fcs.method]]) # calculate the mean flux of the dataset | |
hist.results[2] <- table(flux.ghg[[fcs.method]] == flux.ghg$HMR.f0 & !is.na(flux.ghg$HMR.kappa) )[2] # calculate the numer of HMR fluxes used | |
hist.results[3] <- hist.results[1] / geo_mean(flux.ghg$linear.f0) # calcualte change from linear | |
# fcs.parameter = c("IQR","bias","MSE") | |
# for (i in 1:3) { | |
## calculate mean IQ90 | |
iqr <- cbind(expand.grid(xx,kappa), simus.sum[simus.sum$method==fcs.method,IQR]) | |
colnames(iqr) <- c("f0","kappa","IQR") | |
iqr.matrix <- data.matrix(dcast(iqr,kappa~f0))[,-1] | |
iqr.matrix <- iqr.matrix[,-25] | |
iqr.matrix[,24] <- simus.sum$IQR[simus.sum$method=="linear.f0" & simus.sum$kappa > 2e-02] | |
# iqr.matrix[14,18] <- 0.005 | |
colnames(iqr.matrix) <- as.character(signif(kappa[-25],2)) | |
### frequency 2d histogramm ### | |
# res.matrix[1,24] <- 400 | |
tiff(filename = paste("graphs/Histplot-IQR_",fcs.method,".tif",sep=""), | |
width = 1080, height = 800, units = "px", pointsize = 18) | |
hist3D(z = res.matrix, x=1:25, y=log(kappa[-1]), border = "black", xlab = "log(flux)", ylab="log(kappa)", zlab="flux count", | |
plot=T,add=F,colkey=T, zlim=c(0,160),theta=305, colvar= log(iqr.matrix), phi=45, resfac = 3, # zlim=c(0,450), log(SE.tot), # | |
lighting=F,clab="log(IQ90)", clim = c(-7.5,4.5)) # ,col = ramp.col(c("darkgreen","yellow2","red"),alpha = 1)) # | |
dev.off() | |
hist.results[4] <- mean.param <- sum(res.matrix * iqr.matrix) / sum(res.matrix) | |
# cbind(hist.results, ) | |
# } | |
## calculate bias | |
iqr <- cbind(expand.grid(xx,kappa), simus.sum[simus.sum$method==fcs.method,bias]) | |
colnames(iqr) <- c("f0","kappa","bias") | |
iqr.matrix <- data.matrix(dcast(iqr,kappa~f0))[,-1] | |
iqr.matrix <- iqr.matrix[,-25] | |
iqr.matrix[,24] <- simus.sum$bias[simus.sum$method=="linear.f0" & simus.sum$kappa > 2e-02] | |
# iqr.matrix[14,18] <- 0.005 | |
colnames(iqr.matrix) <- as.character(signif(kappa[-25],2)) | |
### frequency 2d histogramm ### | |
# res.matrix[1,24] <- 400 | |
tiff(filename = paste("graphs/Histplot-bias_",fcs.method,".tif",sep=""), | |
width = 1080, height = 800, units = "px", pointsize = 18) | |
hist3D(z = res.matrix, x=1:25, y=log(kappa[-1]), border = "black", xlab = "log(flux)", ylab="log(kappa)", zlab="flux count", | |
plot=T,add=F,colkey=T, zlim=c(0,160),theta=325, colvar=iqr.matrix , phi=38, resfac = 3, # zlim=c(0,450), log(SE.tot), log(iqr.matrix+5) # | |
lighting=F,clab="bias" ,col = ramp.col(c("red","orange","green","blue"),alpha = 1), clim = c(-5,0.5)) # ) # , clim = c(-3,2) | |
dev.off() | |
hist.results[5] <- mean.param <- sum(res.matrix * iqr.matrix) / sum(res.matrix) | |
# cbind(hist.results, ) | |
# } | |
## calcualte MSE | |
iqr <- cbind(expand.grid(xx,kappa), simus.sum[simus.sum$method==fcs.method,MSE]) | |
colnames(iqr) <- c("f0","kappa","MSE") | |
iqr.matrix <- data.matrix(dcast(iqr,kappa~f0))[,-1] | |
iqr.matrix <- iqr.matrix[,-25] | |
iqr.matrix[,24] <- simus.sum$MSE[simus.sum$method=="kappa.max" & simus.sum$kappa <= 1.2e-05] | |
# iqr.matrix[14,18] <- 0.005 | |
colnames(iqr.matrix) <- as.character(signif(kappa[-25],2)) | |
### frequency 2d histogramm ### | |
# res.matrix[1,24] <- 400 | |
tiff(filename = paste("graphs/Histplot-MSE_",fcs.method,".tif",sep=""), | |
width = 1080, height = 800, units = "px", pointsize = 18) | |
hist3D(z = res.matrix, x=1:25, y=log(kappa[-1]), border = "black", xlab = "log(flux)", ylab="log(kappa)", zlab="flux count", | |
plot=T,add=F,colkey=T, zlim=c(0,160),theta=325, colvar= log(iqr.matrix), phi=38, resfac = 3, # zlim=c(0,450), log(SE.tot), # | |
lighting=F,clab="log(MSE)", clim = c(-16.3,3.2)) # ,col = ramp.col(c("darkgreen","yellow2","red"),alpha = 1)) # | |
dev.off() | |
hist.results[6] <- mean.param <- sum(res.matrix * iqr.matrix) / sum(res.matrix) | |
# cbind(hist.results, ) | |
# } | |
hist.results[7] <- fcs.method | |
write.table(t(hist.results), "hist_results.csv", sep = ";", row.names = F, col.names = F, append = T) | |
} | |
cols <- c("linear.f0","robust.linear.f0","HMR.f0","AIC","g.fact","kappa.max","RF2011","RF2017") | |
# for (j in 1:8) hist.eval(flux.ghg, fcs.method = cols[j]) | |
# hist.eval(flux.ghg, fcs.method ="robust.linear.f0") | |
lapply(seq_along(cols), function(i) hist.eval(data = flux.ghg, cols[i])) | |
# purrr::imap(cols, ~ hist.eval(data = flux.ghg, fcs.method = .x)) | |
# | |
library(plotly) | |
p <- plot_ly(x = flux.ghg$flux.hist, y = log(flux.ghg$kappa.hist)) | |
pp <- subplot( | |
p %>% add_markers(alpha = 0.1), | |
p %>% add_histogram2d() | |
) | |
# colkey(clim = range(log(CI.tot2+0.0001)), clab = "log(90%IQR)", add = TRUE, dist = -0.05, length = 0.7, width = 0.7) | |
# text3D(2, -5, 0, labels = expression(theta[1]), add = TRUE, adj = 1) | |
# plotrgl(lighting = TRUE, smooth = TRUE) | |
# | |
# plot.gasfluxes.hist <- function(x, type = "hist", stat = "n") { | |
# | |
# if (type == "hist") { | |
# | |
# p <- ggplot(x, aes_(x = ~f0, y = ~kappa, z = as.name(stat))) + | |
# stat_contour(aes(fill = ..level..), geom = "polygon", bins = 500) + | |
# facet_wrap(~ method) + | |
# scale_fill_distiller(name = sprintf("flux estimate\n(%s)", stat), palette = "RdYlGn") + # BrBG "YlGnBu" RdYlGn" | |
# scale_y_continuous(trans = "log10", labels = function(b) signif(b, 3)) + | |
# theme_bw() | |
# return(print(p)) | |
# | |
# } | |
# | |
# } #more plot types can be implemented |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment