pyroman1337/Gasflux_Simulation

## Gasflux_Simulation
###############################################
######  PAPER # 3 - Gas flux analysis #########
###############################################
# library(devtools)
# install_bitbucket("ecoRoland/gasfluxes")

library(gasfluxes)
library(data.table)
library(ggplot2)
library(plot3D)
library(plotly)


# simulateflux <- function(t, V, A, c0 = 325, fmax = 5, nflux = 1, kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32))),   # fmax = 100/36, kappa was multiplied by 3600
simulateflux <- function(t, V, A, c0 = 325, fmax = 5, nflux = 25, kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32))),   # fmax = 100/36, kappa was multiplied by 3600
                         nMC , GC.sd, g.factor = 4,   f.detect,
                         seed) {
  if (!missing(seed)) set.seed(seed)
  ## model input data ##
  h <- V/A
  # f0 <- signif(seq(0,fmax,length.out = nflux),3)  # all fluxes to be simulated from 0 to fmax;
  # f0 <- signif(exp(seq(0,log(fmax+1),length.out = nflux)),5)-1  # all fluxes to be simulated from 0 to fmax;

  f0 <- signif(c(0,exp(seq(-5,log(fmax),length.out=nflux-1))),3)  # exponential f0 scale to have more data on small fluxes

  # if (missing(f.detect)) {
    # if (length(t) == 4L) {
    #
    #   # f.detect  <- 13.20 * t[4]^(-0.9973) * c0 * GC.sd/c0 * A
    #   # f.detect  <- 0.036  # calculated with GC.sd = 5
    #   f.detect  <- 0.0808  #  0.0808 nmol N2O / m2 / s for GD.sd = 3 (always HMR)
    #   f.detect  <- 0.08000  # nmol N2O / m2 / s for GD.sd = 3 equilibrium
    #
    # } else {
      #simulate f.detect
    f.detect <- 0 # starting value for f.detect
    C00  <- c0 * 273.15 / 22.4 / (273.15 + 15) * 1000  #nmol N2O / m^3
    sdGC <- GC.sd * 273.15 / 22.4 / (273.15 + 15) * 1000  #from ppb GC.SD to nmol N / m^3
    # t =  c(seq(0,01,length.out=4))*60  # c(0,1900,3000,3600)

    for (i in 1:2) {
      set.seed(24)
      # sim <- data.frame(t = seq(0, 1, length.out = length(t)), C = rnorm(4e3, mean = C00, sd = sdGC),
      #                   id = rep(1:1e3, each = length(t)), A = A, V = V)
      sim <- data.frame(t = t, C = rnorm(4e3, mean = C00, sd = sdGC),
                        id = rep(1:1e3, each = length(t)), A = A, V = V)
      simflux <- gasfluxes(sim, .id = "id", .times = "t", methods = c("HMR", "linear"), k_HMR = log(1.5/3600), plot = FALSE)

      simflux[, f0 := linear.f0]  # use linear estimate as default for this method
      # simflux[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec  mean(t)

      # simflux[, kappa.max := abs(linear.f0/f.detect/max(t))]
      # simflux[, kappa.max := linear.f0/f.detect / (t.inf - sum(t)) ] # tfactor (t.inf - sum(t))/3600

      simflux[  !is.na(HMR.f0), f0 := HMR.f0] #
      n.HMR <- simflux[ !is.na(HMR.f0), .N] #

      f.detect <- simflux[, quantile(f0, 0.975)] # 0.0808 nmol N2O / m2 / s for GD.sd = 3 (always HMR)
      write.table(t(c(as.numeric(f.detect),GC.sd,n.HMR)),file="fdetect-out.csv",append=T,col.names = F, sep=";")

      }
      # selectfluxes(res, "RH2017", f.detect = f.detect)
      # res[method == "HMR", .N] #9
      #
      # stop("Detection limit can only be estimated for flux measurements with three or four time points. Please specify the f.detect parameter.")
    # }
  # }

  # Ct.test <- (c0 - fmax/(-tail(kappa, 1) * h)) + fmax * exp(-tail(kappa, 1)*t)/(-tail(kappa, 1)*h)  # in c0 in ppb
  Ct.test <- (C00 - fmax/(-tail(kappa, 1) * h)) + fmax * exp(-tail(kappa, 1)*t)/(-tail(kappa, 1)*h)  # in c0 in ppb
  # neftel.rsq <- signif(summary(lm(Ct.test ~t))$r.squared,5) # calculation of R.lim (minimal R.squared for a given system)

  # prepare input for flux calculation for all simulated fluxes
  df <- data.table(V=V, A=A, time=t)
  df <- df[rep(seq_len(nrow(df)), nMC)]
  df[, mc := rep(seq_len(nMC), each = length(t))]
  df <- df[rep(seq_len(nrow(df)), length(f0) * length(kappa))]
  combs <- CJ(f0 = f0, kappa = kappa)
  df[, c("f0", "kappa") := combs[rep(seq_len(nrow(combs)), each = length(t) * nMC)]]
  df[, phi := c0 - f0/(-kappa*h)]
  df[, C := phi + f0 * exp(-kappa*t)/(-kappa*h) + rnorm(.N, sd = GC.sd)]

  #flux calculation
  message("Calculating ",  nrow(df) / length(t), " fluxes. Please be patient... jaah ganz easy!")
  res <- gasfluxes(df, c("mc", "f0","kappa", "phi"),methods = c("linear", "robust linear", "HMR"), gfactor = g.factor, plot = FALSE,
                   verbose = FALSE,k_HMR= log(1.5/3600))

  # res[, rsq := df[, cor(C, time)^2, by = c("mc", "kappa", "phi")][["V1"]]] #have gasfluxes return rsq instead

  ###############################
  ### decision making section ###
  ###############################

  # RF2011new
  res <- selectfluxes(res, select = "RF2011new")
  setnames(res,"flux","RF2011")#[]
  # ## dynamic R.squared method, dyn.rsq ##
  # f.detect.rh <- res[f0 == 0, quantile(linear.f0, 0.95)]         # detection limit is 95 % quantile of all linear zero fluxes, correct??????
  # res[, rsq.lim := ((3*f.detect.rh)^2 + neftel.rsq*linear.f0^2) / ((3*f.detect.rh)^2 + linear.f0^2)]  # formula that calculates the R² threshold allowing for nonlinear fluxes; reformulated dyn.rsq definition by Roland
  # res[, range(rsq.lim)]
  # res[, dyn.rsq := linear.f0]
  # res[rsq > rsq.lim & !is.na(HMR.f0), dyn.rsq := HMR.f0] # use HMR estimate if HMR was fitted and method requirements are met
  #
   ### implemented in the selectflux function ###
  # res <- selectfluxes(res, "RH2017", f.detect = f.detect, beta = 3, R.lim = neftel.rsq)
  # res[, dyn.Rsq := flux]
  # res <- selectfluxes(res, "RH2017", f.detect = f.detect,
  #                     dyn.Rsquared = function(f.lin, R.lim, f.detect) ((30 * f.detect)^2 + R.lim * f.lin^2)/((30 * f.detect)^2 + f.lin^2)
  #                     )
 # ## minimum detectable flux scheme, f.det using f.detect ##
  # res[, f.det := linear.f0]  # use linear estimate as default for this method
  # res[linear.f0 > 2*f.detect & !is.na(HMR.f0), f.det := HMR.f0]
  #
  ## g-factor scheme, g.fact ##
  res[, g.fact := robust.linear.f0]  # use robust linear estimate as default for this method
  res[abs(HMR.f0/linear.f0)<g.factor & !is.na(HMR.f0), g.fact := HMR.f0] #note the abs!!!
  res[is.na(g.fact), g.fact := linear.f0]  # use linear if robust and HMR dont work

  ## p-value scheme, p.value ##
  # res[, p.value := linear.f0]  # use linear estimate as default for this method
  # res[ HMR.f0.p < linear.f0.p & !is.na(HMR.f0), p.value := HMR.f0] #note the abs!!!
  #
  ## fix Rsq ##
  # res[, Rsq.fix := linear.f0]  # use linear estimate as default for this method
  # res[ 0.8 < (linear.r)^2 & !is.na(HMR.f0), Rsq.fix := HMR.f0] #note the abs!!!

  ## fix kappa ##
  # res[, kappa.fix := linear.f0]  # use linear estimate as default for this method
  # res[ 5/3600 > HMR.kappa & !is.na(HMR.f0), kappa.fix := HMR.f0] #note the abs!!!
  #

  ## dynamic kappa scheme, kappa.max ##
  res[, kappa.max := robust.linear.f0]  # use robust linear estimate as default for this method
  res[, kappa.Max := linear.f0/f.detect/max(t)]
  res[  HMR.kappa  < kappa.Max  & !is.na(HMR.f0), kappa.max := HMR.f0] #
  res[is.na(kappa.max), kappa.max := linear.f0]  # use linear if robust and HMR dont work

  # ## dynamic kappa scheme, kappa.max + AIC ##
  # res[, kappa.AIC := linear.f0]  # use linear estimate as default for this method
  # res[, kappa.AIC := linear.f0/f.detect/max(t)]
  # res[  HMR.kappa  < kappa.AIC  & !is.na(HMR.f0), kappa.AIC := HMR.f0] #

  ## dynamic kappa scheme PLUS, dyn.kpp ##
  # res[, dyn.kpp := linear.f0]  # use linear estimate as default for this method  res[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec  mean(t)
  # res[, t.inf := (HMR.phi-linear.C0)* h / linear.f0] # sec  corrected version
  # # res[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec  by h instead of * h
  # res[, kpp.max := linear.f0 / f.detect / (t.inf- max(t)) ] #      ## tfactor (t.inf - sum(t))/3600
  # # res[, kpp.max := 1 / (t.inf) ] # tfactor (t.inf - sum(t))/3600
  # # res[, kpp.max := abs(linear.f0/f.detect) / (var(t) / max(t)) ]   # variance into t.factor introduced
  # res[ HMR.kappa  < kpp.max  & !is.na(HMR.f0), dyn.kpp := HMR.f0] #


  ## Rolands new scheme, fuss.2017 ##
  res[, RF2017 := robust.linear.f0]  # use linear estimate as default for this method
  res[HMR.kappa < (20/3600) & !is.na(HMR.f0), RF2017 := HMR.f0] # Rolands new criteria explained by email, kappa smaller 5
  res[is.na(RF2017), RF2017 := linear.f0]  # use linear if robust and HMR dont work

  ## Thus I used HMR if it could be fit and kappa was below 20 h^-1 (in practice this resulted in kappa < 5 for all fluxes accepted as non-linear).  ##

  ## AIC scheme ##
  res[, AIC := robust.linear.f0]  # use linear estimate as default for this method
  res[ HMR.AIC < linear.AIC & !is.na(HMR.f0), AIC := HMR.f0] # & HMR.kappa < 20/3600 ;  Rolands new criteria explained by email, kappa smaller 5
  res[is.na(AIC), AIC := linear.f0]  # use linear if robust and HMR dont work


  ## HMR-linear combination scheme, HMR.f0 ##
  res[is.na(HMR.f0), HMR.f0 := linear.f0] # Use linear for HMR if no HMR could be fitted

  res[is.na(robust.linear.f0), robust.linear.f0 := linear.f0]

  class(res) <- c("gasfluxes.simu", class(res))

  res[] #return value
}

###############################
### start actual simulation ###
# simus <- simulateflux(t = c(seq(0,36,length.out=4))/60, V = 0.0085, A = 0.0707, nMC = 10, seed = 42)
# system.time(simus <- simulateflux(t = c(seq(0,60,length.out=4))*60, V = 0.015, A = 0.07, GC.sd = 3, nMC = 25, seed = 88))   # fibl system, takes 3.5 min for nMC = 40
system.time(simus <- simulateflux(t = c(seq(0,36,length.out=4))*60, V = 0.014, A = 0.07, GC.sd = 3, nMC = 100, seed = 12))   # Nora system, takes 3.5 min for nMC = 40

names(simus)[30] <- "dyn.Rsq"
# simus.55.backup <-simus.45.backup
# simus.40.backup <- simus

# simu.rf2011 <- selectfluxes(simus, "RF2011new")
# simus$g.fact <- simu.rf2011$flux

# dyn.Rsquared = function(f.lin, R.lim, f.detect) ((30 * f.detect)^2 + R.lim * f.lin^2)/((30 * f.detect)^2 + f.lin^2)

# simu.dynRsqpls <- selectfluxes(simus, "RH2017", f.detect = f.detect)

#relatively slow:
# system.time(simus <- simulateflux(t = c(seq(0,36,12))/60, V = 0.0085, A = 0.0707, nMC = 100, seed = 42))
#on my (fast) system: 400 seconds

# simus[HMR.f0 > 3.3, HMR.f0 := 3.3] #  ??? what was that good for? ... limit HMR.f0 estimate on 3.3??


summary.gasfluxes.simu <- function(x) {
  x <- melt(x, id.vars = c("mc", "f0", "kappa", "phi"),
            # measure.vars = c("linear.f0","robust.linear.f0","HMR.f0","g.fact","kappa.max","AIC"),
            measure.vars = c("linear.f0","robust.linear.f0","HMR.f0","AIC","g.fact","kappa.max","RF2011","RF2017"),  #
            # measure.vars = c("linear.f0","kappa.max","HMR.f0"),
            variable.name = "method", value.name = "flux.estimate")
  x <- x[, .(median = median(flux.estimate),
        mean = mean(flux.estimate),
        sd = sd(flux.estimate,na.rm = T),
        lwr = quantile(flux.estimate, 0.05,na.rm=T),
        upr = quantile(flux.estimate, 0.95,na.rm=T),
        var = var(flux.estimate,na.rm = T)), by = c("f0", "kappa", "phi", "method")]
  x[, IQR := upr - lwr]
  x[, bias:= mean - f0]
  x[, MSE := bias^2 + var]
  class(x) <- c("gasfluxes.simu.summary", class(x))
  x[]
}
contour.fix <- function(df) {
  colname <- names(df)
  names(df) <- c("x", "y", "z")
  Range <- as.data.frame(sapply(df, range))
  Range[1,1] <- 1E-6
  Dim <- as.data.frame(t(sapply(df, function(x) length(unique(x)))))
  arb_z =  Range$z[1] - diff(Range$z)/Dim$x
  df2 <- rbind(df,
               expand.grid(x = c(exp(log(Range$x[1]) - diff(log(Range$x))/Dim$x), exp(log(Range$x[2]) + diff(log(Range$x))/Dim$x)),
                           y = unique(df$y), z = arb_z),     # seq(Range$y[1], Range$y[2], length = Dim$y)
               expand.grid(x = unique(df$x),  # seq(Range$x[1], Range$x[2], length = Dim$x)
                           y = c(exp(log(Range$y[1]) - diff(log(Range$y))/Dim$y), exp(log(Range$y[2]) + diff(log(Range$y))/Dim$y)), z = arb_z))
  # g <- ggplot(df2, aes(x, y, z = z)) + labs(x = colname[1], y = colname[2], fill = colname[3]) +
  #   stat_contour(geom="polygon", aes(fill=..level..),bins =50) +
  #   coord_cartesian(xlim=c(Range$x), ylim=c(Range$y), expand = F) +
  #  scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) +
  #   theme_bw()
  names(df2) <- c("f0", "kappa", "stat.x")
  return(df2)
}

simus.sum <- summary(simus)
# simus.sumus <- summary.gasfluxes.simu(simus)
# head(simus.sum)
# head(simus.sumus)

# median(simus$dyn.Rsq)

# simus.sum[IQR > 0.5, IQR := 0.5] # Use linear for HMR if no HMR could be fitted
# simus.sum[, IQR := log(IQR)] #  use logarithmic IQR values
# simus.sum[, var := log(var)] #  use logarithmic variance values
# simus.sum[, MSE := log(MSE)] #  use logarithmic MSE values


plot.gasfluxes.simu.mean <- function(x, type = "contour", stat = "median") {
  if (type == "contour") {
    simus.plot <- simus.sum[,contour.fix(.(f0,kappa,median)),method]

    p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) +
      stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) +
      facet_wrap(~ method) +
      scale_fill_distiller(name = sprintf("flux estimate\n(%s)", stat), palette = "BrBG") +  #  BRBG "YlGnBu" RdYlGn"
      scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) +
      coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) +
      theme_bw()
    return(print(p))
  }
}
plot.gasfluxes.simu.mean(simus.sum )

plot.gasfluxes.simu.iqr <- function(x, type = "IQR.plot", stat = "IQ90") {

  if (type == "IQR.plot") {
    simus.plot <- simus.sum[,contour.fix(.(f0,kappa,log(IQR))),method]

    p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) +
      stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) +
      facet_wrap(~ method) +
      scale_fill_distiller(name = sprintf("estimated\n log(%s)", stat), palette = "RdYlGn") +  #  BrBG "YlGnBu" RdYlGn"
      scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) +
      coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) +
      theme_bw()
    # annotate("text", x = 2, y=1E-5, label = c("blablab"))
    return(print(p))
  }
}  #more plot types can be implemented
plot.gasfluxes.simu.iqr(simus.sum)

plot.gasfluxes.simu.var <- function(x, type = "var.plot", stat = "var") {

  if (type == "var.plot") {
    simus.plot <- simus.sum[,contour.fix(.(f0,kappa,log(var))),method]

    p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) +
      stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) +
      facet_wrap(~ method) +
      scale_fill_distiller(name = sprintf("estimated\n log(%s)", stat), palette = "RdYlGn") +  #  BrBG "YlGnBu" RdYlGn"
      scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) +
      coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) +
      theme_bw()
    # annotate("text", x = 2, y=1E-5, label = c("blablab"))
    return(print(p))
  }
}  #more plot types can be implemented
plot.gasfluxes.simu.var(simus.sum)

plot.gasfluxes.simu.var <- function(x, type = "var.plot", stat = "MSE") {

  if (type == "var.plot") {
    simus.plot <- simus.sum[,contour.fix(.(f0,kappa,log(MSE))),method]

    p <- ggplot(simus.plot, aes_(x = ~f0, y = ~kappa, z = ~stat.x)) +
      stat_contour(aes(fill = ..level..), geom = "polygon", bins = 128) +
      facet_wrap(~ method) +
      scale_fill_distiller(name = sprintf("estimated\n log(%s)", stat), palette = "RdYlGn") +  #  BrBG "YlGnBu" RdYlGn"
      scale_y_log10(breaks = c(1E-2,1E-3,1E-4,1E-5)) +
      coord_cartesian(xlim = range(simus.sum$f0), ylim= range(simus.sum$kappa), expand = F) +
      theme_bw()
    # annotate("text", x = 2, y=1E-5, label = c("blablab"))
    return(print(p))
  }
}  #more plot types can be implemented
plot.gasfluxes.simu.var(simus.sum)


simus.sum[,mean(exp(IQR)),method]  # average IQR for each method

simus.sum[f0 == 0, .(quantile(mean,0.975)), method]  # detection limit for each method


###  ----
# plot(nora.dataset$flux,log(nora.dataset$kappa), xlim = c(0,2.78), ylim = c(1E-5,1E-2),col="black")
# plot.gasfluxes.hist(simus.sum)

# simus.sum[simus.sum$method=="dyn.Rsq" & simus.sum$f0 == 0.333 ]

simus.sum.backup <- simus.sum
#
# simus.sum.tmax13 <- simus.sum
#
#
# simus.60 <- simus.sum.tmax60[method=="kappa.max"]
# simus.60[,method:="t.max = 60 min"]
# simus.45 <- simus.sum.tmax45[method=="kappa.max"]
# simus.45[,method:="t.max = 45 min"]
# simus.30 <- simus.sum.tmax30[method=="kappa.max"]
# simus.30[,method:="t.max = 30 min"]
# simus.12 <- simus.sum.tmax12[method=="kappa.max"]
# simus.12[,method:="t.max = 12 min"]
# simus.75 <- simus.sum.tmax75[method=="kappa.max"]
# simus.75[,method:="t.max = 75 min"]
# simus.90 <- simus.sum.tmax90[method=="kappa.max"]
# simus.90[,method:="t.max = 90 min"]


# simus.GCsd <- rbind(simus.15,simus.09,simus.06,simus.03,simus.01)
# simus.GCsd <- rbind(simus.13,simus.30,simus.45,simus.60,simus.75,simus.90)
# simus.sum <- simus.GCsd
# plot.gasfluxes.simu.iqr(simus.sum)

###########################################
#### density frequence comparision ########
###########################################
# load a GHG flux dataset...
# lysi.gfg <- read.table("gfg_Lysi.csv",sep=",",header=T)
## add fibl data
# fibl.raw <- read.table("/home/roman/PhD/data/GasFluxGames/Fibl/N2O_Rohdaten.csv",header=T, sep = ",")
# fibl.raw <- read.table("/home/hueppir/nashome/Gasflux-Simulations/GasFluxGames/Fibl/N2O_Rohdaten.csv",header=T, sep = ",")
fibl.raw <- fread("/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/N2O_Rohdaten.csv",header=T, sep = ",")
fibl.raw$time <- fibl.raw$time * 3600  # we use seconds instead of hours

# art.raw <- fread("/home/hueppir/PhD/data/Reckenholz2014/fluxes-n2o/n2o-input-complete.csv",header=T,sep = ",")
# art.raw$V <- art.raw$V / 1000
# fibl.raw <- art.raw
fibl.raw$C <- fibl.raw$C * 1E3  / 22.4136 * (273.15/(15 + 273.15))  # n mol N2O/m^3 ;22.4L/mol;273K,

# flux.ghg <- gasfluxes(fibl.raw, methods = c("linear", "robust linear", "HMR"), k_HMR = log(1.5/3600),plot = T)   # fluxes in nmol-N2O/m²/s
flux.ghg <- gasfluxes(na.omit(fibl.raw),
                       methods = c("linear", "robust linear", "HMR"),
                       .id = c("ID", "V", "A"),
                       .V = "V", .A = "A", .times = "time", .C = "C",  k_HMR = log(1.5/3600),plot = F)   # fluxes in nmol-N2O/m²/s

# plot_ly(flux.ghg,x = flux.ghg$linear.C0, y = flux.ghg$HMR.phi)


# plot_ly(data=flux.ghg, x = flux.ghg$HMR.kappa, y = flux.ghg$t.phi )

# write.table(flux.ghg,  "/home/roman/PhD/data/GasFluxGames/Fibl/fibl-n2o-fluxes.csv", sep = ";", row.names = F)
write.table(flux.ghg,  "/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-fluxes_detailed.csv", sep = ";", row.names = F)
# write.table(flux.ghg,  "/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Reckenholz/art-n2o-fluxes_detailed.csv", sep = ";", row.names = F)
flux.ghg.backup <- flux.ghg

flux.ghg <- fread("/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-fluxes_detailed.csv")

# always HMR if fitted
flux.ghg[is.na(HMR.f0), HMR.f0 := linear.f0] # Use linear for HMR if no HMR could be fitted
flux.ghg[is.na(HMR.f0), HMR.f0 := robust.linear.f0]
flux.ghg[is.na(robust.linear.f0) & is.na(HMR.f0.se),HMR.f0 := linear.f0]
flux.ghg[HMR.f0 > 5000,  HMR.f0 := linear.f0]  ## avoid the very large HMR fluxes


### kappa.max method
f.detect <- 0.069   # from calculation   0.028
t <- c(seq(0,36,length.out=4))*60

flux.ghg[, kappa.max := robust.linear.f0]  # use linear estimate as default for this method
flux.ghg[is.na(kappa.max), kappa.max := linear.f0]  # use linear estimate as default for this method
flux.ghg[is.na(robust.linear.f0) & is.na(HMR.f0.se),kappa.max := linear.f0]
flux.ghg[, kappa.dyn := abs(linear.f0/f.detect/max(t))]
flux.ghg[  HMR.kappa  < kappa.dyn  & !is.na(HMR.f0.se), kappa.max := HMR.f0] #

# flux.ghg[HMR.kappa  < kappa.max & !is.na(HMR.f0.se), .N]
# flux.ghg[,mean(dyn.kappa,na.rm = T)]

### AIC method
flux.ghg[, AIC := robust.linear.f0]  # use linear estimate as default for this method
flux.ghg[  HMR.AIC  < linear.AIC  & !is.na(HMR.f0.se), AIC := HMR.f0] #
flux.ghg[is.na(robust.linear.f0.se) & is.na(HMR.f0.se),AIC := linear.f0]

# flux.ghg[ HMR.AIC  < linear.AIC & !is.na(HMR.f0.se), .N]
# flux.ghg[,mean(AIC,na.rm = T)]

### g.fact
flux.ghg[, g.fact := robust.linear.f0]  # use linear estimate as default for this method
flux.ghg[HMR.f0/robust.linear.f0 < 4 & !is.na(HMR.f0.se), g.fact := HMR.f0] # Rolands new criteria explained by email, kappa smaller 5
flux.ghg[is.na(g.fact), g.fact := linear.f0] # Use linear for HMR if no HMR could be fitted
# flux.ghg[HMR.f0/robust.linear.f0 < 4 & !is.na(HMR.f0.se) , .N]
# flux.ghg[,mean(g.fact,na.rm = T)]

## Rolands new scheme, fuss.2017 ##
max.kp <- 20
flux.ghg[, RF2017 := robust.linear.f0]  # use linear estimate as default for this method
flux.ghg[HMR.kappa < (max.kp/3600) & !is.na(HMR.f0.se), RF2017 := HMR.f0] # Rolands new criteria explained by email, kappa smaller 5
flux.ghg[is.na(RF2017), RF2017 := linear.f0] # Use linear for HMR if no HMR could be fitted
# flux.ghg[HMR.kappa  < max.kp/3600 & !is.na(HMR.f0.se), .N]
# flux.ghg[,mean(fuss.2017,na.rm = T)]

### RF2011new; bad p-value criteria
selectfluxes(flux.ghg, select = "RF2011new")
setnames(flux.ghg,"flux","RF2011")[]

### Robust linear use linear if only 3 data points
flux.ghg[is.na(robust.linear.f0), robust.linear.f0 := linear.f0] # Use linear for HMR if no HMR could be fitted
# flux.ghg$robust.linear.f0[is.na(flux.ghg$robust.linear.f0)] <- flux.ghg$linear.f0[is.na(flux.ghg$robust.linear.f0)]
# flux.ghg[method == "HMR", .N]
# flux.ghg[,mean(flux,na.rm = T)]

# method.tab$mean[3] <- flux.ghg[,mean(HMR.f0,na.rm = T)]
# flux.ghg[!is.na(HMR.AIC), .N]


# fibl.melt <- melt(flux.ghg, id.vars = c("ID"),
#           measure.vars = c("linear.f0", "HMR.f0","robust.linear.f0"),
#           variable.name = "method", value.name = "flux.estimate")
# fibl.results <- fibl.melt[,.(mean.flux=mean(flux.estimate,na.rm = T)),method]

# flux.ghg <- flux.ghg.backup
# flux.ghg <- fread("/home/hueppir/nashome/Gasflux-Simulations/GasFluxGames/Reckenholz/art-n2o-fluxes_detailed.csv")
# selectfluxes(flux.ghg, select = "RH2017", R.lim =  0.6, f.detect = 0.067)
# flux.ghg <- selectfluxes(flux.ghg, select = "RH2017", R.lim =  0.352, f.detect = 0.39)
# method.tab <- data.table(methods = c("linear","robust linear","HMR","AIC","g-factor","kappa.max","RF2011","RF2017"))

# matrix(NA, nrow = 8, ncol = 6)
# row.names(method.tab) <- c("linear","robust linear","HMR","AIC","g-factor","kappa.max","RF2011","RF2017")
# names(method.tab)  <- c("mean","n HMR","n linear","nonlinear/linear","fdet","MSE")

# method.tab$mean <- NA
# method.tab$mean[1] <- flux.ghg[,mean(linear.f0)]
# method.tab$mean[2] <- flux.ghg[,mean(robust.linear.f0,na.rm=T)]


# flux.ghg[, .(mean = mean)]
# flux.ghg[method == "HMR", .N]
 # flux.ghg[,mean(flux,na.rm = T)]
 # HMR method, robust.linear alternatively


 # method.tab$nonlinear.linear <- NA
 # method.tab$bias <- NA
 # method.tab$IQ90 <- NA
 # method.tab$MSE <- NA
 # flux.ghg[is.na(robust.linear.f0),.N]
 # flux.ghg[is.na(robust.linear.f0),robust.linear.f0 := linear.f0]
# method.tab$bias <- flux.ghg[,mean(robust.linear.f0,na.rm = T)]


 # h <- 0.2
 # f.detect <- 0.04


 # ### kpp.max (dyn.kappa) method that is more restrictive and uses t.meas equal to t.phi-t.meas
 # flux.ghg[, dyn.kappa.plus := robust.linear.f0]  # use linear estimate as default for this method
 # flux.ghg[is.na(dyn.kappa.plus), dyn.kappa.plus := linear.f0]  # use linear estimate as default for this method
 # flux.ghg[, t.inf := (HMR.phi-linear.C0)/linear.f0/h ] # sec  mean(t)
 # flux.ghg[, kpp.max := linear.f0/f.detect / (t.inf - sum(t)) ] # tfactor (t.inf - sum(t))/3600
 # # res[, kpp.max := abs(linear.f0/f.detect) / (var(t) / max(t)) ]   # variance into t.factor introduced
 # flux.ghg[ HMR.kappa  < kpp.max  & !is.na(HMR.f0), dyn.kappa.plus := HMR.f0] #
 #
 # flux.ghg[HMR.kappa  < kpp.max, .N]
 # flux.ghg[,mean(dyn.kappa.plus,na.rm = T)]
 # plot_ly(data=flux.ghg, x = flux.ghg$HMR.kappa, y = flux.ghg$HMR.phi )

 geo_mean <- function(data) {

       min_data <- min(data,na.rm = T)
       log_data <- log(data-min_data)
       gm <- exp(mean(log_data[is.finite(log_data)]))+min_data
       return(gm)
   }


 # ### RF2011old; wrong p-values
 # flux.ghg.orig <- gasfluxes(fibl.raw, methods = c("linear", "robust linear", "original HMR"), k_HMR = log(1.5/3600),plot = F)   # fluxes in nmol-N2O/m²/s
 # write.table(flux.ghg.orig,  "/home/roman/PhD/data/GasFluxGames/Fibl/fibl-n2o-originalHMR-fluxes.csv", sep = ";", row.names = F)
 # selectfluxes(flux.ghg.orig, select = "RF2011")
 # flux.ghg.orig[method == "original HMR", .N]
 # flux.ghg.orig[,mean(flux,na.rm = T)]
flux.ghg.DT <- flux.ghg

write.table(flux.ghg, "/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-results_detailed2.csv", sep = ";", row.names = F)


flux.ghg <- read.table("/home/hueppir/naslocal/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-results_detailed2.csv",sep=";",header = T)
# flux.ghg <- read.table("L:/Gasflux-Simulations/GasFluxGames/Fibl/fibl-n2o-results_detailed2.csv",sep=";",header = T)


# flux.ghg[,..cols]

hist.eval <- function(data, fcs.method, #fcs.parameter = "IQR",
                      fmax = 5, nflux = 25, kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32))),
                      f0 = signif(c(0,exp(seq(-5,log(fmax),length.out=nflux-1))),3)) {
  # substitute(x)
flux.ghg <- as.data.frame(data)
# fcs.method <- substitute(kappa.max)
# flux.ghg <- read.table("F:/SAE/Matti/Nora/Nora-fluxes-n2o_all.csv",sep = ";",header = T)
# # flux.ghg <- read.table("/home/roman/SAE/Matti/Nora/Nora-fluxes-n2o_all.csv",sep = ";",header = T)
# fmax <- 5    # 100/36
# nflux <- 25
# kappa <- c(0.0001/36,exp(seq(-11,-3.5,0.32)))
# f0 <- signif(c(0,exp(seq(-5,log(fmax),length.out=nflux-1))),3)  # exponential f0 scale to have more data on small fluxes

# kappa.na <- 0.0001
# kappa.na <- 0.0001/36  # = min(kappa) 1E-6
# flux.ghg$flux.hist <- flux.ghg$rsq.rh
# nora.dataset  <- data.frame(flux = flux.ghg$linear.f0, kappa= flux.ghg$HMR.kappa)  ## prepare dataset for 3d plot combination
# nora.dataset$kappa[is.na(nora.dataset$kappa)] <-  0.01  # NA flux values are 0
# simus[ ,nora.kappa:=flux.ghg$HMR.kappa , method]

# mean.flux.dataset <- mean(flux.ghg$flux,na.rm=T)

### reframe measured data into model space ###
flux.ghg$flux.hist <-  flux.ghg[,fcs.method]  #    flux.ghg$linear.f0   ## insert actual method results here!!

flux.ghg$flux.hist[is.na(flux.ghg$flux.hist)] <- 0  # NA flux values are 0
flux.ghg$flux.hist[flux.ghg$flux.hist < 0] <- abs(flux.ghg$flux.hist[flux.ghg$flux.hist < 0]) # transform negative to positive fluxes
flux.ghg$flux.hist[flux.ghg$flux.hist>fmax] <- fmax  ## 100   # fmax  2.8 % of Maikes fluxes are higher than that

flux.ghg$kappa.hist <- flux.ghg$HMR.kappa
kappa.hist <-  kappa
flux.ghg$kappa.hist[flux.ghg$HMR.diagnostics=="singulärer Gradient"] <- min(kappa.hist) # lower end of simulation "singular gradient"

flux.ghg$kappa.hist[is.na(flux.ghg$kappa.hist)] <- max(kappa.hist)*0.95 # higher end of simulation

# flux.ghg$kappa.hist[ flux.ghg$kappa.hist > max(kappa.hist)] <- max(kappa.hist) # higher end of simulation
# flux.ghg$kappa.hist[ flux.ghg$kappa.hist < min(kappa.hist)] <- min(kappa.hist) # lower end of simulation

# hist(flux.ghg$kappa.hist,40)
lysi.gfg <- as.matrix(cbind(flux.ghg$flux.hist,flux.ghg$kappa.hist))

# plot(lysi.gfg[,1],lysi.gfg[,2],xlab="linear f0",ylab="kappa")
# plot(log(lysi.gfg[,2]),lysi.gfg[,1],col="blue",ylab="estimated flux",xlab="kappa",pch=20,cex=0.5)
# plot(log(flux.ghg$HMR.kappa),flux.ghg$rsq.rh,col="blue",ylab="estimated flux",xlab="kappa",pch=20,cex=0.5)

# points(flux.ghg$HMR.f0,lysi.gfg[,2],col="blue")
#
# # lysi.gfg[order(lysi.gfg[,1]),]

x <- abs(lysi.gfg[,1])  # vector with all fluxes
y <- lysi.gfg[,2]  # vector with all coresponding kappa

# my_hist3d <- function(x, y, freq=FALSE, nclass="auto") {
n<-length(x)
kappa[1] <- 0.000001
# if (nclass == "auto") { nclass<-ceiling(sqrt(nclass.Sturges(x))) }
nclass <- length(kappa)
# breaks.x <- seq(min(x),max(x),length=(nclass+1))
# breaks.x <- seq(0,fmax,length.out=26)# c(f0,104)-2
breaks.x <- c(f0*0.95,fmax*1.05)    # c(f0,104)-2

# breaks.y <- seq(min(y),max(y),length=(nclass+1))
breaks.y <- log(c(kappa*0.95,max(kappa)*1.05))  # c(0.0001,exp(seq(-11,-3.2,0.3))*3600) ; kappa = c(0.0001/36,exp(seq(-11,-3.5,0.32)))
h <- NULL
for (i in 1:nclass)
  for (j in 1:nclass)
    h <- c(h, sum(x <= breaks.x[j+1] & x >= breaks.x[j] & log(y) <= breaks.y[i+1] & log(y) >= breaks.y[i] ) )
# if (freq) h <- h / n
# xx <-round(mean(breaks.x[1:2])+(0:(nclass-1))*diff(breaks.x[1:2]), 1)
xx <-f0
xx[1] <-  0.000001
# yy <- as.factor(round(mean(breaks.y[1:2])+(0:(nclass-1))*diff(breaks.y[1:2]), 1))
# yy <- (length(breaks.y)-1):1  # as.factor(round(mean(breaks.y[1:2])+(0:(nclass-1))*diff(breaks.y[1:2]), 1))
yy <- 1:(length(breaks.y)-1)  # as.factor(round(mean(breaks.y[1:2])+(0:(nclass-1))*diff(breaks.y[1:2]), 1))
res <- cbind(expand.grid(xx,yy), h)
# kappa[1] <- 0.04/3600
colnames(res) <- c("f0","kappa","Frequency")
res.matrix <- t(data.matrix(dcast(res,kappa~f0))[,-1])
res.matrix <- res.matrix[,-25]
colnames(res.matrix) <- as.character(signif(kappa[-25],2))


# hist.results    <-  mean(flux.ghg[[fcs.method]],na.rm = T)                                                # calculate the mean flux of the dataset
hist.results    <-  geo_mean(flux.ghg[[fcs.method]])                                                # calculate the mean flux of the dataset
hist.results[2] <- table(flux.ghg[[fcs.method]] == flux.ghg$HMR.f0 & !is.na(flux.ghg$HMR.kappa) )[2]    # calculate the numer of HMR fluxes used
hist.results[3] <- hist.results[1] / geo_mean(flux.ghg$linear.f0)                                   # calcualte change from linear

# fcs.parameter = c("IQR","bias","MSE")
# for (i in 1:3)  {

## calculate mean IQ90
iqr <- cbind(expand.grid(xx,kappa), simus.sum[simus.sum$method==fcs.method,IQR])
colnames(iqr) <- c("f0","kappa","IQR")
iqr.matrix <- data.matrix(dcast(iqr,kappa~f0))[,-1]
iqr.matrix <- iqr.matrix[,-25]
iqr.matrix[,24] <- simus.sum$IQR[simus.sum$method=="linear.f0" & simus.sum$kappa > 2e-02]
# iqr.matrix[14,18] <- 0.005
colnames(iqr.matrix) <- as.character(signif(kappa[-25],2))

### frequency 2d histogramm ###
# res.matrix[1,24] <- 400
tiff(filename = paste("graphs/Histplot-IQR_",fcs.method,".tif",sep=""),
     width = 1080, height = 800, units = "px", pointsize = 18)
hist3D(z = res.matrix, x=1:25, y=log(kappa[-1]), border = "black", xlab = "log(flux)", ylab="log(kappa)", zlab="flux count",
                   plot=T,add=F,colkey=T, zlim=c(0,160),theta=305, colvar= log(iqr.matrix), phi=45, resfac = 3, # zlim=c(0,450), log(SE.tot),        #
                   lighting=F,clab="log(IQ90)", clim = c(-7.5,4.5))  #   ,col = ramp.col(c("darkgreen","yellow2","red"),alpha = 1)) #
dev.off()

hist.results[4] <- mean.param <- sum(res.matrix * iqr.matrix) / sum(res.matrix)
# cbind(hist.results, )
# }

## calculate bias
iqr <- cbind(expand.grid(xx,kappa), simus.sum[simus.sum$method==fcs.method,bias])
colnames(iqr) <- c("f0","kappa","bias")
iqr.matrix <- data.matrix(dcast(iqr,kappa~f0))[,-1]
iqr.matrix <- iqr.matrix[,-25]
iqr.matrix[,24] <- simus.sum$bias[simus.sum$method=="linear.f0" & simus.sum$kappa > 2e-02]
# iqr.matrix[14,18] <- 0.005
colnames(iqr.matrix) <- as.character(signif(kappa[-25],2))

### frequency 2d histogramm ###
# res.matrix[1,24] <- 400
tiff(filename = paste("graphs/Histplot-bias_",fcs.method,".tif",sep=""),
     width = 1080, height = 800, units = "px", pointsize = 18)
hist3D(z = res.matrix, x=1:25, y=log(kappa[-1]), border = "black", xlab = "log(flux)", ylab="log(kappa)", zlab="flux count",
       plot=T,add=F,colkey=T, zlim=c(0,160),theta=325, colvar=iqr.matrix , phi=38, resfac = 3, # zlim=c(0,450), log(SE.tot), log(iqr.matrix+5)       #
       lighting=F,clab="bias" ,col = ramp.col(c("red","orange","green","blue"),alpha = 1), clim = c(-5,0.5)) # )  #    , clim = c(-3,2)
dev.off()

hist.results[5] <- mean.param <- sum(res.matrix * iqr.matrix) / sum(res.matrix)
# cbind(hist.results, )
# }

## calcualte MSE
iqr <- cbind(expand.grid(xx,kappa), simus.sum[simus.sum$method==fcs.method,MSE])
colnames(iqr) <- c("f0","kappa","MSE")
iqr.matrix <- data.matrix(dcast(iqr,kappa~f0))[,-1]
iqr.matrix <- iqr.matrix[,-25]
iqr.matrix[,24] <- simus.sum$MSE[simus.sum$method=="kappa.max" & simus.sum$kappa <= 1.2e-05]
# iqr.matrix[14,18] <- 0.005
colnames(iqr.matrix) <- as.character(signif(kappa[-25],2))

### frequency 2d histogramm ###
# res.matrix[1,24] <- 400
tiff(filename = paste("graphs/Histplot-MSE_",fcs.method,".tif",sep=""),
     width = 1080, height = 800, units = "px", pointsize = 18)
hist3D(z = res.matrix, x=1:25, y=log(kappa[-1]), border = "black", xlab = "log(flux)", ylab="log(kappa)", zlab="flux count",
       plot=T,add=F,colkey=T, zlim=c(0,160),theta=325, colvar= log(iqr.matrix), phi=38, resfac = 3, # zlim=c(0,450), log(SE.tot),        #
       lighting=F,clab="log(MSE)", clim = c(-16.3,3.2))  #   ,col = ramp.col(c("darkgreen","yellow2","red"),alpha = 1)) #
dev.off()

hist.results[6] <- mean.param <- sum(res.matrix * iqr.matrix) / sum(res.matrix)
# cbind(hist.results, )
# }
hist.results[7] <- fcs.method

write.table(t(hist.results), "hist_results.csv", sep = ";", row.names = F, col.names = F, append = T)

}

cols <- c("linear.f0","robust.linear.f0","HMR.f0","AIC","g.fact","kappa.max","RF2011","RF2017")

# for (j in 1:8) hist.eval(flux.ghg, fcs.method = cols[j])

# hist.eval(flux.ghg, fcs.method ="robust.linear.f0")

lapply(seq_along(cols), function(i) hist.eval(data = flux.ghg, cols[i]))

# purrr::imap(cols, ~ hist.eval(data = flux.ghg, fcs.method = .x))

#


library(plotly)
p <- plot_ly(x = flux.ghg$flux.hist, y = log(flux.ghg$kappa.hist))
pp <- subplot(
  p %>% add_markers(alpha = 0.1),
  p %>% add_histogram2d()
)
# colkey(clim = range(log(CI.tot2+0.0001)), clab = "log(90%IQR)", add = TRUE, dist = -0.05, length = 0.7, width = 0.7)
# text3D(2, -5, 0, labels = expression(theta[1]), add = TRUE, adj = 1)
# plotrgl(lighting = TRUE, smooth = TRUE)
#
# plot.gasfluxes.hist <- function(x, type = "hist", stat = "n") {
#
#   if (type == "hist") {
#
#     p <- ggplot(x, aes_(x = ~f0, y = ~kappa, z = as.name(stat))) +
#       stat_contour(aes(fill = ..level..), geom = "polygon", bins = 500) +
#       facet_wrap(~ method) +
#       scale_fill_distiller(name = sprintf("flux estimate\n(%s)", stat), palette = "RdYlGn") +  #  BrBG "YlGnBu" RdYlGn"
#       scale_y_continuous(trans = "log10", labels = function(b) signif(b, 3)) +
#       theme_bw()
#     return(print(p))
#
#   }
#
# }  #more plot types can be implemented