metamaden/pwrEWAS_itable.R

## pwrEWAS_itable.R
#!/usr/bin/env R

# Author: Sean Maden
#
# *** Description ***
# This is a sligthly modified form of the pwrEWAS power analysis function from the pwrEWAS R package.
#
# Source this function from an active R session using:
# > devtools::source_gist("https://gist.github.com/metamaden/953b128c230e32ef1beecbc2695e93c1")
#
# *** References ***
#
# 1. This code was used in the manuscript:
#
# Maden SK* , Walsh B, Ellrott K, Hansen KD, Thompson RF, Nellore A. “Recountmethylation enables flexible
# analysis of public blood DNA methylation array data.” Bioinformatics Advances 3, 1. 2023.
#
# 2. Original source code for pwrEWAS() function:
# https://github.com/stefangraw/pwrEWAS/blob/master/R/pwrEWAS_v1.8.R
#
#
#

#' @title pwrEWAS_itable - A computationally efficient tool for comprehensive power estimation in EWAS
#'
#' @description pwrEWAS is a computationally efficient tool to estimate power in EWAS as a function of sample and effect size for two-group comparisons of DNAm (e.g., case vs control, exposed vs non-exposed, etc.). Detailed description of in-/outputs, instructions and an example, as well as interpretations of the example results are provided in the vignette: vignette("pwrEWAS")
#'
#' @param tissueType Tissue type DNA methylation data, containing columns "mu" for Beta-value means, and "var" for Beta-value variances.
#' @param minTotSampleSize Minimum total sample size.
#' @param maxTotSampleSize Maximum total sample size.
#' @param SampleSizeSteps Sample size increments.
#' @param NcntPer Percentage sample group 1 (control group) (NcntPer = 0.5 indicates a balanced design).
#' @param targetDelta Target maximum difference in mean DNAm. (Either 'targetDelta' or 'deltaSD' should be specified)
#' @param deltaSD Standard deviation of simulated differences. (Either 'targetDelta' or 'deltaSD' should be specified)
#' @param J Number of CpGs tested/simulated (default: 100000).
#' @param targetDmCpGs Target number of DM CpGs.
#' @param detectionLimit Smallest detectable difference in DNAm (default: 0.01).
#' @param DMmethod Method of Differential Methylation analysis: "limma" (default), "t-test (unequal var)", "t-test (equal var)", "Wilcox rank sum", "CPGassoc".
#' @param FDRcritVal FDRcritVal (default: 0.05).
#' @param core Number of threads for multi-threading (default: 1).
#' @param sims Number of simulated data sets (default: 50).
#'
#' @return pwrEWAS will return an object with the following four attributes: meanPower, powerArray, deltaArray, and metric, where metric contains marTypeI, classicalPower, FDR, and FDC
#'
#' @keywords DNAm microarray power
#'
#' @export
#'
#' @examples
#' library(minfiData)
#' data("MsetEx") # load MethylSet
#' ms <- MsetEx
#' bval <- getBeta(ms) # get DNAm fractions
#' # get the summary data frame
#' dfpwr <- data.frame(mu = rowMeans(bval, na.rm = T),
#'                    var = rowVars(bval, na.rm = T))
#'
#' # get power analysis results
#' outDelta <- pwrEWAS_itable(
#'     tissueType = dfpwr,
#'     minTotSampleSize = 10,
#'     maxTotSampleSize = 20,
#'     SampleSizeSteps = 10,
#'     NcntPer = 0.5,
#'     targetDelta = c(0.2, 0.5),
#'     J = 1000,
#'     targetDmCpGs = 10,
#'     detectionLimit = 0.01,
#'     DMmethod = "limma",
#'     FDRcritVal = 0.05,
#'     core = 2,
#'     sims = 30)
#'
#' outSD <- pwrEWAS(
#'     tissueType = dfpwr,
#'     minTotSampleSize = 10,
#'     maxTotSampleSize = 20,
#'     SampleSizeSteps = 10,
#'     NcntPer = 0.5,
#'     deltaSD = c(0.02, 0.03),
#'     J = 1000,
#'     targetDmCpGs = 10,
#'     detectionLimit = 0.01,
#'     DMmethod = "limma",
#'     FDRcritVal = 0.05,
#'     core = 2,
#'     sims = 30)
pwrEWAS_itable <- function(
    tissueType, # dna methylation data means and variances
    minTotSampleSize, # min total sample size
    maxTotSampleSize, # max total sample size
    SampleSizeSteps, # steps for increasing total sample size
    NcntPer, # percentage of control sample
    targetDelta = NULL, # vector of 99 percentile of the target max DM
    deltaSD = NULL, # vector of sd(delta)
    J = 100000, # number of simulated CpGs
    targetDmCpGs, # target number for truely differentially methylated CpG
    detectionLimit = 0.01, # lower bound for differential methylated CpG's to be considered TRULY differential methylated
    DMmethod = c("limma", "t-test (unequal var)", "t-test (equal var)", "Wilcox rank sum", "CPGassoc"), # method to detect differential methylation
    FDRcritVal = 0.05,
    core = 1, # number of cores to multi thread
    sims = 50
){
  methPara <- tissueType
  DMmethod <- match.arg(DMmethod)

  if(!is.null(targetDelta) & !is.null(deltaSD)) stop("Please specify only one: 'targetDelta' or 'deltaSD'")

  #load data
  #methPara <- pwrEWAS.data:::loadDataset(tissueType)
  # methPara <-  ExperimentHub::ExperimentHub()[["EH3078"]]
  CpGonArray <- length(methPara$mu)

  output <- NULL
  totSampleSizes <- seq(minTotSampleSize, maxTotSampleSize, SampleSizeSteps)

  # initalize multi thread clusters
  cl <- parallel::makeCluster(core) # multi core
  doSNOW::registerDoSNOW(cl)
  # doParallel::registerDoParallel(cl) # multi core

  # combining function for foreach loops
  combine_tau <- function(listA, listB){
    if(is.null(listA)) return(listB)
    if(!methods::is(listA[["power"]], "array") & !methods::is(listA[["power"]], "matrix")) listA[["power"]] <- matrix(listA[["power"]])
    if(!methods::is(listB[["power"]], "array") & !methods::is(listB[["power"]], "matrix")) listB[["power"]] <- matrix(listB[["power"]])
    if(!methods::is(listA[["metric"]]$marTypeI, "array") & !methods::is(listA[["metric"]]$marTypeI, "matrix")) listA[["metric"]]$marTypeI <- matrix(listA[["metric"]]$marTypeI)
    if(!methods::is(listB[["metric"]]$marTypeI, "array") & !methods::is(listB[["metric"]]$marTypeI, "matrix")) listB[["metric"]]$marTypeI <- matrix(listB[["metric"]]$marTypeI)
    if(!methods::is(listA[["metric"]]$classicalPower, "array") & !methods::is(listA[["metric"]]$classicalPower, "matrix")) listA[["metric"]]$classicalPower <- matrix(listA[["metric"]]$classicalPower)
    if(!methods::is(listB[["metric"]]$classicalPower, "array") & !methods::is(listB[["metric"]]$classicalPower, "matrix")) listB[["metric"]]$classicalPower <- matrix(listB[["metric"]]$classicalPower)
    if(!methods::is(listA[["metric"]]$FDR, "array") & !methods::is(listA[["metric"]]$FDR, "matrix")) listA[["metric"]]$FDR <- matrix(listA[["metric"]]$FDR)
    if(!methods::is(listB[["metric"]]$FDR, "array") & !methods::is(listB[["metric"]]$FDR, "matrix")) listB[["metric"]]$FDR <- matrix(listB[["metric"]]$FDR)
    if(!methods::is(listA[["metric"]]$FDC, "array") & !methods::is(listA[["metric"]]$FDC, "matrix")) listA[["metric"]]$FDC <- matrix(listA[["metric"]]$FDC)
    if(!methods::is(listB[["metric"]]$FDC, "array") & !methods::is(listB[["metric"]]$FDC, "matrix")) listB[["metric"]]$FDC <- matrix(listB[["metric"]]$FDC)
    if(!methods::is(listA[["metric"]]$probTP, "array") & !methods::is(listA[["metric"]]$probTP, "matrix")) listA[["metric"]]$probTP <- matrix(listA[["metric"]]$probTP)
    if(!methods::is(listB[["metric"]]$probTP, "array") & !methods::is(listB[["metric"]]$probTP, "matrix")) listB[["metric"]]$probTP <- matrix(listB[["metric"]]$probTP)
    if(!methods::is(listA[["delta"]], "list")) listA[["delta"]] <- list(listA[["delta"]])
    returnList <- list()
    returnList[["power"]] <- abind::abind(listA[["power"]], listB[["power"]], along = 3)
    returnList[["delta"]] <- listA[["delta"]]
    returnList[["delta"]][[length(listA[["delta"]])+1]] <- listB[["delta"]]
    returnList[["metric"]]$marTypeI <- abind::abind(listA[["metric"]]$marTypeI, listB[["metric"]]$marTypeI, along = 3)
    returnList[["metric"]]$classicalPower <- abind::abind(listA[["metric"]]$classicalPower, listB[["metric"]]$classicalPower, along = 3)
    returnList[["metric"]]$FDR <- abind::abind(listA[["metric"]]$FDR, listB[["metric"]]$FDR, along = 3)
    returnList[["metric"]]$FDC <- abind::abind(listA[["metric"]]$FDC, listB[["metric"]]$FDC, along = 3)
    returnList[["metric"]]$probTP <- abind::abind(listA[["metric"]]$probTP, listB[["metric"]]$probTP, along = 3)
    return(returnList)
  }

  combine_totSampleSizes <- function(listA, listB){
    if(is.null(listA)) return(listB)
    returnList <- list()
    returnList[["power"]] <- cbind(listA[["power"]], listB[["power"]])
    returnList[["delta"]] <- cbind(listA[["delta"]], listB[["delta"]])
    returnList[["metric"]]$marTypeI <- cbind(listA[["metric"]]$marTypeI, listB[["metric"]]$marTypeI)
    returnList[["metric"]]$classicalPower <- cbind(listA[["metric"]]$classicalPower, listB[["metric"]]$classicalPower)
    returnList[["metric"]]$FDR <- cbind(listA[["metric"]]$FDR, listB[["metric"]]$FDR)
    returnList[["metric"]]$FDC <- cbind(listA[["metric"]]$FDC, listB[["metric"]]$FDC)
    returnList[["metric"]]$probTP <- cbind(listA[["metric"]]$probTP, listB[["metric"]]$probTP)
    return(returnList)
  }


  if(is.null(deltaSD)){
    # finding tau and K for target DM CpG's
    cat(paste("[",Sys.time(),"] ", "Finding tau...", sep = ""))
    K <- NULL
    tau <- NULL
    for(d in seq_along(targetDelta)){
      myTau <- getTau(targetDmCpGs, targetDelta[d], methPara, detectionLimit, J, CpGonArray)
      tau[d] <- myTau$tau
      K[d] <- myTau$K # number of changed CpGs
    }
    cat(paste("done", " [",Sys.time(),"]\n", sep = ""))
    print(paste("The following taus were chosen: ", paste(tau, collapse = ", "), sep = ""))
  } else {
    tau <- deltaSD
    K <- NULL
    for(d in seq_along(tau)){
      K[d] <- getK(targetDmCpGs, methPara, detectionLimit, J, CpGonArray, tau)
    }
  }


  # main function
  startTime <- Sys.time()
  cat(paste("[",startTime,"] ", "Running simulation\n", sep = ""))
  iterations <- length(totSampleSizes) * length(tau)
  pb <- utils::txtProgressBar(max = iterations, style = 3)
  progress <- function(n) utils::setTxtProgressBar(pb, n)
  opts <- list(progress = progress)
  Ntot <- NULL
  multiThreadOut <- foreach(d = seq_along(tau),
                            .combine = combine_tau,
                            .packages = c("truncnorm", "limma", "CpGassoc", "genefilter"),
                            .export = c("getAlphBet", "getMeanVar", "beta2Mvalue", "limma", "ttestSlow", "ttestFast", "Wilcox", "CPGassoc")) %:%
    foreach(Ntot = totSampleSizes, .combine = combine_totSampleSizes, .options.snow = opts) %dopar% {

      utils::setTxtProgressBar(pb, (d-1)*length(totSampleSizes) + which(Ntot==totSampleSizes))

      Ncnt <- round(Ntot * NcntPer)
      Ntx <- Ntot - Ncnt
      marPower <- NULL
      deltaSim <- NULL

      marTypeI <- NULL
      FDR <- NULL
      classicalPower <- NULL
      FDC <- NULL
      probTP <- NULL

      for(sim in seq_len(sims)){

        ## sample CpGs
        cpgIdx <- sample(x = seq_len(CpGonArray), size = J, replace = TRUE) # pick J random CpG's to be simulated
        cpgIdxName <- paste(seq_len(J), "_", rownames(methPara)[cpgIdx], sep = "") # ensuring unique CpG name (allowing unique sampling with replacement)
        changedCpgsIdx <- sample(x = cpgIdx, size = K[d]) # pick K random CpG's to be changed in mean meth
        changedCpgsIdxName <- cpgIdxName[match(changedCpgsIdx, cpgIdx)]

        ## Change Mu for "changedCpgsIdx"'s CpG's
        # drawing delta from truncated normal
        delta <- truncnorm::rtruncnorm(1, mean = 0, sd = as.numeric(tau[d]),
                                       a=0.5 - methPara$mu[changedCpgsIdx] - sqrt(0.25-methPara$var[changedCpgsIdx]),
                                       b=0.5 - methPara$mu[changedCpgsIdx] + sqrt(0.25-methPara$var[changedCpgsIdx]))
        deltaSim <- c(deltaSim, delta)  # multi core
        meaningfulDM <- (abs(delta) >= detectionLimit)
        meaningfulDMName <- changedCpgsIdxName[meaningfulDM]

        # changing mean methylation for specific CpG's (changedCpgsIdx)
        muToBeSimuUNchanged <- methPara$mu[cpgIdx]
        muToBeSimuChanged   <- methPara$mu[cpgIdx]
        muToBeSimuChanged[match(changedCpgsIdx, cpgIdx)] <- muToBeSimuChanged[match(changedCpgsIdx, cpgIdx)] + delta

        ## get alpha and beta
        params_unchanged <- getAlphBet(myMean = muToBeSimuUNchanged, myVar = methPara$var[cpgIdx])
        alpha_unchanged <- params_unchanged$alpha
        beta_unchanged <- params_unchanged$beta
        params_changed <- getAlphBet(myMean = muToBeSimuChanged, myVar = methPara$var[cpgIdx])
        alpha_changed <- params_changed$alpha
        beta_changed <- params_changed$beta

        ## simulate baseline beta values
        g1Beta <- NULL
        g2Beta <- NULL
        g1Beta <- matrix(stats::rbeta(J*Ncnt, rep(alpha_unchanged, each = Ncnt), rep(beta_unchanged, each = Ncnt)), ncol = Ncnt, byrow = TRUE)
        g2Beta <- matrix(stats::rbeta(J*Ntx, rep(alpha_changed, each = Ntx), rep(beta_changed, each = Ntx)), ncol = Ntx, byrow = TRUE)
        g1Beta[g1Beta == 1] <- max(g1Beta[g1Beta != 1]) # replacing 0/1 by min/max
        g2Beta[g2Beta == 1] <- max(g2Beta[g2Beta != 1])
        g1Beta[g1Beta == 0] <- min(g1Beta[g1Beta != 0])
        g2Beta[g2Beta == 0] <- min(g2Beta[g2Beta != 0])
        rownames(g1Beta) <- rownames(g2Beta) <- paste(seq_len(J),"_",names(alpha_unchanged),sep = "")

        # Tests
        DMtest <- switch(DMmethod,
                         "t-test (unequal var)" = ttestSlow(g1Beta,g2Beta,Ncnt,Ntx,paired=FALSE), # t-test slow (unequal var)
                         "t-test (equal var)" = ttestFast(g1Beta,g2Beta,Ncnt,Ntx), # t-test (equal var)
                         "CPGassoc" = CPGassoc(g1Beta, g2Beta, Ncnt,Ntx), # CPG assoc
                         "Wilcox rank sum" = Wilcox(g1Beta, g2Beta, Ncnt,Ntx), # Wilcox rank sum test
                         "limma" = limma(g1Beta, g2Beta, Ncnt,Ntx), # limma
                         stop("Test not found")
        )

        # table from paper
        notDM  <- cpgIdxName[!(cpgIdxName %in% changedCpgsIdxName)]
        DM_negligible <- changedCpgsIdxName[!(changedCpgsIdxName %in% meaningfulDMName)]
        DM_meaningful <- changedCpgsIdxName[changedCpgsIdxName %in% meaningfulDMName]

        FP  <- intersect(cpgIdxName[DMtest$fdr<FDRcritVal], notDM) # FP
        NP <- intersect(cpgIdxName[DMtest$fdr<FDRcritVal], DM_negligible) # NP
        TP <- intersect(cpgIdxName[DMtest$fdr<FDRcritVal], DM_meaningful) # TP
        detectedCpGs <- cpgIdxName[DMtest$fdr<FDRcritVal]

        TN <- intersect(cpgIdxName[!(DMtest$fdr<FDRcritVal)], notDM) # TN
        NN <- intersect(cpgIdxName[!(DMtest$fdr<FDRcritVal)], DM_negligible) # NN
        FN <- intersect(cpgIdxName[!(DMtest$fdr<FDRcritVal)], DM_meaningful) # FN


        ## metrics
        marPower[sim] <- ifelse(length(DM_meaningful) > 0, length(TP)/length(DM_meaningful), NA)
        marTypeI[sim] <- ifelse(length(notDM) > 0, length(FP)/length(notDM), NA)
        FDR[sim] <- ifelse(length(detectedCpGs) > 0, length(FP)/length(detectedCpGs), NA)
        FDC[sim] <- ifelse(length(TP) > 0, (length(FP))/length(TP), NA) # expected false discovery for each true discovery
        classicalPower[sim] <- (length(NP)+length(TP))/(length(DM_negligible)+length(DM_meaningful))
        probTP[sim] <- ifelse(length(TP)>0, 1, 0)

      } # end sim

      outSim <- list()
      outSim[["power"]] <- marPower # multi core
      outSim[["delta"]] <- deltaSim # multi core
      outSim[["metric"]]$marTypeI <- marTypeI
      outSim[["metric"]]$FDR <- FDR
      outSim[["metric"]]$classicalPower <- classicalPower
      outSim[["metric"]]$FDC <- FDC
      outSim[["metric"]]$probTP <- probTP
      outSim
    } # end tau and totSampleSizes
  close(pb)
  parallel::stopCluster(cl)
  cat(paste("[",startTime,"] Running simulation ... done [",Sys.time(),"]\n", sep = ""))

  # dirty fix
  if(is.null(targetDelta)) targetDelta <- tau

  # calculate mean marginal power and adding names
  if(length(targetDelta) == 1 & length(totSampleSizes) == 1)  output$meanPower <- matrix(mean(multiThreadOut[["power"]], na.rm=TRUE))
  if(length(targetDelta) == 1 & length(totSampleSizes) > 1)   output$meanPower <- matrix(apply(multiThreadOut[["power"]], 2, mean, na.rm=TRUE), ncol = 1)
  if(length(targetDelta) > 1)                                 output$meanPower <- apply(multiThreadOut[["power"]], c(2,3), mean, na.rm=TRUE)
  rownames(output$meanPower) <- totSampleSizes
  colnames(output$meanPower) <- targetDelta

  # powerArray
  if(length(targetDelta) == 1) output$powerArray <- array(data = multiThreadOut[["power"]], dim = c(sims, length(totSampleSizes), length(targetDelta)))
  if(length(targetDelta) > 1 & length(totSampleSizes) == 1) output$powerArray <- multiThreadOut[["power"]]
  if(length(targetDelta) > 1) output$powerArray <- multiThreadOut[["power"]]
  dimnames(output$powerArray) <- list(seq_len(sims), totSampleSizes, targetDelta)

  # deltaArray
  if(length(targetDelta) == 1 & length(totSampleSizes) == 1)  output$deltaArray <- list(matrix(multiThreadOut[["delta"]]))
  if(length(targetDelta) == 1 & length(totSampleSizes) > 1)   output$deltaArray <- list(multiThreadOut[["delta"]])
  if(length(targetDelta) > 1 & length(totSampleSizes) == 1)   output$deltaArray <- lapply(multiThreadOut[["delta"]],as.matrix)
  if(length(targetDelta) > 1 & length(totSampleSizes) > 1)    output$deltaArray <- multiThreadOut[["delta"]]
  names(output$deltaArray) <- targetDelta
  for(d in seq_along(targetDelta)){
    colnames(output$deltaArray[[d]]) <- totSampleSizes
  }


  # calculate mean marTypeI and adding names
  if(length(targetDelta) == 1 & length(totSampleSizes) == 1)  output$metric$marTypeI <- matrix(mean(multiThreadOut[["metric"]]$marTypeI, na.rm=TRUE))
  if(length(targetDelta) == 1 & length(totSampleSizes) > 1)   output$metric$marTypeI <- matrix(apply(multiThreadOut[["metric"]]$marTypeI, 2, mean, na.rm=TRUE), ncol = 1)
  if(length(targetDelta) > 1)                                 output$metric$marTypeI <- apply(multiThreadOut[["metric"]]$marTypeI, c(2,3), mean, na.rm=TRUE)
  rownames(output$metric$marTypeI) <- totSampleSizes
  colnames(output$metric$marTypeI) <- targetDelta

  # calculate mean classicalPower and adding names
  if(length(targetDelta) == 1 & length(totSampleSizes) == 1)  output$metric$classicalPower <- matrix(mean(multiThreadOut[["metric"]]$classicalPower, na.rm=TRUE))
  if(length(targetDelta) == 1 & length(totSampleSizes) > 1)   output$metric$classicalPower <- matrix(apply(multiThreadOut[["metric"]]$classicalPower, 2, mean, na.rm=TRUE), ncol = 1)
  if(length(targetDelta) > 1)                                 output$metric$classicalPower <- apply(multiThreadOut[["metric"]]$classicalPower, c(2,3), mean, na.rm=TRUE)
  rownames(output$metric$classicalPower) <- totSampleSizes
  colnames(output$metric$classicalPower) <- targetDelta

  # calculate mean FDR and adding names
  if(length(targetDelta) == 1 & length(totSampleSizes) == 1)  output$metric$FDR <- matrix(mean(multiThreadOut[["metric"]]$FDR, na.rm=TRUE))
  if(length(targetDelta) == 1 & length(totSampleSizes) > 1)   output$metric$FDR <- matrix(apply(multiThreadOut[["metric"]]$FDR, 2, mean, na.rm=TRUE), ncol = 1)
  if(length(targetDelta) > 1)                                 output$metric$FDR <- apply(multiThreadOut[["metric"]]$FDR, c(2,3), mean, na.rm=TRUE)
  rownames(output$metric$FDR) <- totSampleSizes
  colnames(output$metric$FDR) <- targetDelta

  # calculate mean FDC and adding names
  if(length(targetDelta) == 1 & length(totSampleSizes) == 1)  output$metric$FDC <- matrix(mean(multiThreadOut[["metric"]]$FDC, na.rm=TRUE))
  if(length(targetDelta) == 1 & length(totSampleSizes) > 1)   output$metric$FDC <- matrix(apply(multiThreadOut[["metric"]]$FDC, 2, mean, na.rm=TRUE), ncol = 1)
  if(length(targetDelta) > 1)                                 output$metric$FDC <- apply(multiThreadOut[["metric"]]$FDC, c(2,3), mean, na.rm=TRUE)
  rownames(output$metric$FDC) <- totSampleSizes
  colnames(output$metric$FDC) <- targetDelta

  # calculate probability of detecting at least one TP and adding names
  if(length(targetDelta) == 1 & length(totSampleSizes) == 1)  output$metric$probTP <- matrix(mean(multiThreadOut[["metric"]]$probTP, na.rm=TRUE))
  if(length(targetDelta) == 1 & length(totSampleSizes) > 1)   output$metric$probTP <- matrix(apply(multiThreadOut[["metric"]]$probTP, 2, mean, na.rm=TRUE), ncol = 1)
  if(length(targetDelta) > 1)                                 output$metric$probTP <- apply(multiThreadOut[["metric"]]$probTP, c(2,3), mean, na.rm=TRUE)
  rownames(output$metric$probTP) <- totSampleSizes
  colnames(output$metric$probTP) <- targetDelta

  return(output)
}
	#!/usr/bin/env R

	# Author: Sean Maden
	#
	# * Description *
	# This is a sligthly modified form of the pwrEWAS power analysis function from the pwrEWAS R package.
	#
	# Source this function from an active R session using:
	# > devtools::source_gist("https://gist.github.com/metamaden/953b128c230e32ef1beecbc2695e93c1")
	#
	# * References *
	#
	# 1. This code was used in the manuscript:
	#
	# Maden SK* , Walsh B, Ellrott K, Hansen KD, Thompson RF, Nellore A. “Recountmethylation enables flexible
	# analysis of public blood DNA methylation array data.” Bioinformatics Advances 3, 1. 2023.
	#
	# 2. Original source code for pwrEWAS() function:
	# https://github.com/stefangraw/pwrEWAS/blob/master/R/pwrEWAS_v1.8.R
	#
	#
	#

	#' @title pwrEWAS_itable - A computationally efficient tool for comprehensive power estimation in EWAS
	#'
	#' @description pwrEWAS is a computationally efficient tool to estimate power in EWAS as a function of sample and effect size for two-group comparisons of DNAm (e.g., case vs control, exposed vs non-exposed, etc.). Detailed description of in-/outputs, instructions and an example, as well as interpretations of the example results are provided in the vignette: vignette("pwrEWAS")
	#'
	#' @param tissueType Tissue type DNA methylation data, containing columns "mu" for Beta-value means, and "var" for Beta-value variances.
	#' @param minTotSampleSize Minimum total sample size.
	#' @param maxTotSampleSize Maximum total sample size.
	#' @param SampleSizeSteps Sample size increments.
	#' @param NcntPer Percentage sample group 1 (control group) (NcntPer = 0.5 indicates a balanced design).
	#' @param targetDelta Target maximum difference in mean DNAm. (Either 'targetDelta' or 'deltaSD' should be specified)
	#' @param deltaSD Standard deviation of simulated differences. (Either 'targetDelta' or 'deltaSD' should be specified)
	#' @param J Number of CpGs tested/simulated (default: 100000).
	#' @param targetDmCpGs Target number of DM CpGs.
	#' @param detectionLimit Smallest detectable difference in DNAm (default: 0.01).
	#' @param DMmethod Method of Differential Methylation analysis: "limma" (default), "t-test (unequal var)", "t-test (equal var)", "Wilcox rank sum", "CPGassoc".
	#' @param FDRcritVal FDRcritVal (default: 0.05).
	#' @param core Number of threads for multi-threading (default: 1).
	#' @param sims Number of simulated data sets (default: 50).
	#'
	#' @return pwrEWAS will return an object with the following four attributes: meanPower, powerArray, deltaArray, and metric, where metric contains marTypeI, classicalPower, FDR, and FDC
	#'
	#' @keywords DNAm microarray power
	#'
	#' @export
	#'
	#' @examples
	#' library(minfiData)
	#' data("MsetEx") # load MethylSet
	#' ms <- MsetEx
	#' bval <- getBeta(ms) # get DNAm fractions
	#' # get the summary data frame
	#' dfpwr <- data.frame(mu = rowMeans(bval, na.rm = T),
	#' var = rowVars(bval, na.rm = T))
	#'
	#' # get power analysis results
	#' outDelta <- pwrEWAS_itable(
	#' tissueType = dfpwr,
	#' minTotSampleSize = 10,
	#' maxTotSampleSize = 20,
	#' SampleSizeSteps = 10,
	#' NcntPer = 0.5,
	#' targetDelta = c(0.2, 0.5),
	#' J = 1000,
	#' targetDmCpGs = 10,
	#' detectionLimit = 0.01,
	#' DMmethod = "limma",
	#' FDRcritVal = 0.05,
	#' core = 2,
	#' sims = 30)
	#'
	#' outSD <- pwrEWAS(
	#' tissueType = dfpwr,
	#' minTotSampleSize = 10,
	#' maxTotSampleSize = 20,
	#' SampleSizeSteps = 10,
	#' NcntPer = 0.5,
	#' deltaSD = c(0.02, 0.03),
	#' J = 1000,
	#' targetDmCpGs = 10,
	#' detectionLimit = 0.01,
	#' DMmethod = "limma",
	#' FDRcritVal = 0.05,
	#' core = 2,
	#' sims = 30)
	pwrEWAS_itable <- function(
	tissueType, # dna methylation data means and variances
	minTotSampleSize, # min total sample size
	maxTotSampleSize, # max total sample size
	SampleSizeSteps, # steps for increasing total sample size
	NcntPer, # percentage of control sample
	targetDelta = NULL, # vector of 99 percentile of the target max DM
	deltaSD = NULL, # vector of sd(delta)
	J = 100000, # number of simulated CpGs
	targetDmCpGs, # target number for truely differentially methylated CpG
	detectionLimit = 0.01, # lower bound for differential methylated CpG's to be considered TRULY differential methylated
	DMmethod = c("limma", "t-test (unequal var)", "t-test (equal var)", "Wilcox rank sum", "CPGassoc"), # method to detect differential methylation
	FDRcritVal = 0.05,
	core = 1, # number of cores to multi thread
	sims = 50
	){
	methPara <- tissueType
	DMmethod <- match.arg(DMmethod)

	if(!is.null(targetDelta) & !is.null(deltaSD)) stop("Please specify only one: 'targetDelta' or 'deltaSD'")

	#load data
	#methPara <- pwrEWAS.data:::loadDataset(tissueType)
	# methPara <- ExperimentHub::ExperimentHub()[["EH3078"]]
	CpGonArray <- length(methPara$mu)

	output <- NULL
	totSampleSizes <- seq(minTotSampleSize, maxTotSampleSize, SampleSizeSteps)

	# initalize multi thread clusters
	cl <- parallel::makeCluster(core) # multi core
	doSNOW::registerDoSNOW(cl)
	# doParallel::registerDoParallel(cl) # multi core

	# combining function for foreach loops
	combine_tau <- function(listA, listB){
	if(is.null(listA)) return(listB)
	if(!methods::is(listA[["power"]], "array") & !methods::is(listA[["power"]], "matrix")) listA[["power"]] <- matrix(listA[["power"]])
	if(!methods::is(listB[["power"]], "array") & !methods::is(listB[["power"]], "matrix")) listB[["power"]] <- matrix(listB[["power"]])
	if(!methods::is(listA[["metric"]]$marTypeI, "array") & !methods::is(listA[["metric"]]$marTypeI, "matrix")) listA[["metric"]]$marTypeI <- matrix(listA[["metric"]]$marTypeI)
	if(!methods::is(listB[["metric"]]$marTypeI, "array") & !methods::is(listB[["metric"]]$marTypeI, "matrix")) listB[["metric"]]$marTypeI <- matrix(listB[["metric"]]$marTypeI)
	if(!methods::is(listA[["metric"]]$classicalPower, "array") & !methods::is(listA[["metric"]]$classicalPower, "matrix")) listA[["metric"]]$classicalPower <- matrix(listA[["metric"]]$classicalPower)
	if(!methods::is(listB[["metric"]]$classicalPower, "array") & !methods::is(listB[["metric"]]$classicalPower, "matrix")) listB[["metric"]]$classicalPower <- matrix(listB[["metric"]]$classicalPower)
	if(!methods::is(listA[["metric"]]$FDR, "array") & !methods::is(listA[["metric"]]$FDR, "matrix")) listA[["metric"]]$FDR <- matrix(listA[["metric"]]$FDR)
	if(!methods::is(listB[["metric"]]$FDR, "array") & !methods::is(listB[["metric"]]$FDR, "matrix")) listB[["metric"]]$FDR <- matrix(listB[["metric"]]$FDR)
	if(!methods::is(listA[["metric"]]$FDC, "array") & !methods::is(listA[["metric"]]$FDC, "matrix")) listA[["metric"]]$FDC <- matrix(listA[["metric"]]$FDC)
	if(!methods::is(listB[["metric"]]$FDC, "array") & !methods::is(listB[["metric"]]$FDC, "matrix")) listB[["metric"]]$FDC <- matrix(listB[["metric"]]$FDC)
	if(!methods::is(listA[["metric"]]$probTP, "array") & !methods::is(listA[["metric"]]$probTP, "matrix")) listA[["metric"]]$probTP <- matrix(listA[["metric"]]$probTP)
	if(!methods::is(listB[["metric"]]$probTP, "array") & !methods::is(listB[["metric"]]$probTP, "matrix")) listB[["metric"]]$probTP <- matrix(listB[["metric"]]$probTP)
	if(!methods::is(listA[["delta"]], "list")) listA[["delta"]] <- list(listA[["delta"]])
	returnList <- list()
	returnList[["power"]] <- abind::abind(listA[["power"]], listB[["power"]], along = 3)
	returnList[["delta"]] <- listA[["delta"]]
	returnList[["delta"]][[length(listA[["delta"]])+1]] <- listB[["delta"]]
	returnList[["metric"]]$marTypeI <- abind::abind(listA[["metric"]]$marTypeI, listB[["metric"]]$marTypeI, along = 3)
	returnList[["metric"]]$classicalPower <- abind::abind(listA[["metric"]]$classicalPower, listB[["metric"]]$classicalPower, along = 3)
	returnList[["metric"]]$FDR <- abind::abind(listA[["metric"]]$FDR, listB[["metric"]]$FDR, along = 3)
	returnList[["metric"]]$FDC <- abind::abind(listA[["metric"]]$FDC, listB[["metric"]]$FDC, along = 3)
	returnList[["metric"]]$probTP <- abind::abind(listA[["metric"]]$probTP, listB[["metric"]]$probTP, along = 3)
	return(returnList)
	}

	combine_totSampleSizes <- function(listA, listB){
	if(is.null(listA)) return(listB)
	returnList <- list()
	returnList[["power"]] <- cbind(listA[["power"]], listB[["power"]])
	returnList[["delta"]] <- cbind(listA[["delta"]], listB[["delta"]])
	returnList[["metric"]]$marTypeI <- cbind(listA[["metric"]]$marTypeI, listB[["metric"]]$marTypeI)
	returnList[["metric"]]$classicalPower <- cbind(listA[["metric"]]$classicalPower, listB[["metric"]]$classicalPower)
	returnList[["metric"]]$FDR <- cbind(listA[["metric"]]$FDR, listB[["metric"]]$FDR)
	returnList[["metric"]]$FDC <- cbind(listA[["metric"]]$FDC, listB[["metric"]]$FDC)
	returnList[["metric"]]$probTP <- cbind(listA[["metric"]]$probTP, listB[["metric"]]$probTP)
	return(returnList)
	}


	if(is.null(deltaSD)){
	# finding tau and K for target DM CpG's
	cat(paste("[",Sys.time(),"] ", "Finding tau...", sep = ""))
	K <- NULL
	tau <- NULL
	for(d in seq_along(targetDelta)){
	myTau <- getTau(targetDmCpGs, targetDelta[d], methPara, detectionLimit, J, CpGonArray)
	tau[d] <- myTau$tau
	K[d] <- myTau$K # number of changed CpGs
	}
	cat(paste("done", " [",Sys.time(),"]\n", sep = ""))
	print(paste("The following taus were chosen: ", paste(tau, collapse = ", "), sep = ""))
	} else {
	tau <- deltaSD
	K <- NULL
	for(d in seq_along(tau)){
	K[d] <- getK(targetDmCpGs, methPara, detectionLimit, J, CpGonArray, tau)
	}
	}



	# main function
	startTime <- Sys.time()
	cat(paste("[",startTime,"] ", "Running simulation\n", sep = ""))
	iterations <- length(totSampleSizes) * length(tau)
	pb <- utils::txtProgressBar(max = iterations, style = 3)
	progress <- function(n) utils::setTxtProgressBar(pb, n)
	opts <- list(progress = progress)
	Ntot <- NULL
	multiThreadOut <- foreach(d = seq_along(tau),
	.combine = combine_tau,
	.packages = c("truncnorm", "limma", "CpGassoc", "genefilter"),
	.export = c("getAlphBet", "getMeanVar", "beta2Mvalue", "limma", "ttestSlow", "ttestFast", "Wilcox", "CPGassoc")) %:%
	foreach(Ntot = totSampleSizes, .combine = combine_totSampleSizes, .options.snow = opts) %dopar% {

	utils::setTxtProgressBar(pb, (d-1)*length(totSampleSizes) + which(Ntot==totSampleSizes))

	Ncnt <- round(Ntot * NcntPer)
	Ntx <- Ntot - Ncnt
	marPower <- NULL
	deltaSim <- NULL

	marTypeI <- NULL
	FDR <- NULL
	classicalPower <- NULL
	FDC <- NULL
	probTP <- NULL

	for(sim in seq_len(sims)){

	## sample CpGs
	cpgIdx <- sample(x = seq_len(CpGonArray), size = J, replace = TRUE) # pick J random CpG's to be simulated
	cpgIdxName <- paste(seq_len(J), "_", rownames(methPara)[cpgIdx], sep = "") # ensuring unique CpG name (allowing unique sampling with replacement)
	changedCpgsIdx <- sample(x = cpgIdx, size = K[d]) # pick K random CpG's to be changed in mean meth
	changedCpgsIdxName <- cpgIdxName[match(changedCpgsIdx, cpgIdx)]

	## Change Mu for "changedCpgsIdx"'s CpG's
	# drawing delta from truncated normal
	delta <- truncnorm::rtruncnorm(1, mean = 0, sd = as.numeric(tau[d]),
	a=0.5 - methPara$mu[changedCpgsIdx] - sqrt(0.25-methPara$var[changedCpgsIdx]),
	b=0.5 - methPara$mu[changedCpgsIdx] + sqrt(0.25-methPara$var[changedCpgsIdx]))
	deltaSim <- c(deltaSim, delta) # multi core
	meaningfulDM <- (abs(delta) >= detectionLimit)
	meaningfulDMName <- changedCpgsIdxName[meaningfulDM]

	# changing mean methylation for specific CpG's (changedCpgsIdx)
	muToBeSimuUNchanged <- methPara$mu[cpgIdx]
	muToBeSimuChanged <- methPara$mu[cpgIdx]
	muToBeSimuChanged[match(changedCpgsIdx, cpgIdx)] <- muToBeSimuChanged[match(changedCpgsIdx, cpgIdx)] + delta

	## get alpha and beta
	params_unchanged <- getAlphBet(myMean = muToBeSimuUNchanged, myVar = methPara$var[cpgIdx])
	alpha_unchanged <- params_unchanged$alpha
	beta_unchanged <- params_unchanged$beta
	params_changed <- getAlphBet(myMean = muToBeSimuChanged, myVar = methPara$var[cpgIdx])
	alpha_changed <- params_changed$alpha
	beta_changed <- params_changed$beta

	## simulate baseline beta values
	g1Beta <- NULL
	g2Beta <- NULL
	g1Beta <- matrix(stats::rbeta(J*Ncnt, rep(alpha_unchanged, each = Ncnt), rep(beta_unchanged, each = Ncnt)), ncol = Ncnt, byrow = TRUE)
	g2Beta <- matrix(stats::rbeta(J*Ntx, rep(alpha_changed, each = Ntx), rep(beta_changed, each = Ntx)), ncol = Ntx, byrow = TRUE)
	g1Beta[g1Beta == 1] <- max(g1Beta[g1Beta != 1]) # replacing 0/1 by min/max
	g2Beta[g2Beta == 1] <- max(g2Beta[g2Beta != 1])
	g1Beta[g1Beta == 0] <- min(g1Beta[g1Beta != 0])
	g2Beta[g2Beta == 0] <- min(g2Beta[g2Beta != 0])
	rownames(g1Beta) <- rownames(g2Beta) <- paste(seq_len(J),"_",names(alpha_unchanged),sep = "")

	# Tests
	DMtest <- switch(DMmethod,
	"t-test (unequal var)" = ttestSlow(g1Beta,g2Beta,Ncnt,Ntx,paired=FALSE), # t-test slow (unequal var)
	"t-test (equal var)" = ttestFast(g1Beta,g2Beta,Ncnt,Ntx), # t-test (equal var)
	"CPGassoc" = CPGassoc(g1Beta, g2Beta, Ncnt,Ntx), # CPG assoc
	"Wilcox rank sum" = Wilcox(g1Beta, g2Beta, Ncnt,Ntx), # Wilcox rank sum test
	"limma" = limma(g1Beta, g2Beta, Ncnt,Ntx), # limma
	stop("Test not found")
	)

	# table from paper
	notDM <- cpgIdxName[!(cpgIdxName %in% changedCpgsIdxName)]
	DM_negligible <- changedCpgsIdxName[!(changedCpgsIdxName %in% meaningfulDMName)]
	DM_meaningful <- changedCpgsIdxName[changedCpgsIdxName %in% meaningfulDMName]

	FP <- intersect(cpgIdxName[DMtest$fdr<FDRcritVal], notDM) # FP
	NP <- intersect(cpgIdxName[DMtest$fdr<FDRcritVal], DM_negligible) # NP
	TP <- intersect(cpgIdxName[DMtest$fdr<FDRcritVal], DM_meaningful) # TP
	detectedCpGs <- cpgIdxName[DMtest$fdr<FDRcritVal]

	TN <- intersect(cpgIdxName[!(DMtest$fdr<FDRcritVal)], notDM) # TN
	NN <- intersect(cpgIdxName[!(DMtest$fdr<FDRcritVal)], DM_negligible) # NN
	FN <- intersect(cpgIdxName[!(DMtest$fdr<FDRcritVal)], DM_meaningful) # FN


	## metrics
	marPower[sim] <- ifelse(length(DM_meaningful) > 0, length(TP)/length(DM_meaningful), NA)
	marTypeI[sim] <- ifelse(length(notDM) > 0, length(FP)/length(notDM), NA)
	FDR[sim] <- ifelse(length(detectedCpGs) > 0, length(FP)/length(detectedCpGs), NA)
	FDC[sim] <- ifelse(length(TP) > 0, (length(FP))/length(TP), NA) # expected false discovery for each true discovery
	classicalPower[sim] <- (length(NP)+length(TP))/(length(DM_negligible)+length(DM_meaningful))
	probTP[sim] <- ifelse(length(TP)>0, 1, 0)

	} # end sim

	outSim <- list()
	outSim[["power"]] <- marPower # multi core
	outSim[["delta"]] <- deltaSim # multi core
	outSim[["metric"]]$marTypeI <- marTypeI
	outSim[["metric"]]$FDR <- FDR
	outSim[["metric"]]$classicalPower <- classicalPower
	outSim[["metric"]]$FDC <- FDC
	outSim[["metric"]]$probTP <- probTP
	outSim
	} # end tau and totSampleSizes
	close(pb)
	parallel::stopCluster(cl)
	cat(paste("[",startTime,"] Running simulation ... done [",Sys.time(),"]\n", sep = ""))

	# dirty fix
	if(is.null(targetDelta)) targetDelta <- tau

	# calculate mean marginal power and adding names
	if(length(targetDelta) == 1 & length(totSampleSizes) == 1) output$meanPower <- matrix(mean(multiThreadOut[["power"]], na.rm=TRUE))
	if(length(targetDelta) == 1 & length(totSampleSizes) > 1) output$meanPower <- matrix(apply(multiThreadOut[["power"]], 2, mean, na.rm=TRUE), ncol = 1)
	if(length(targetDelta) > 1) output$meanPower <- apply(multiThreadOut[["power"]], c(2,3), mean, na.rm=TRUE)
	rownames(output$meanPower) <- totSampleSizes
	colnames(output$meanPower) <- targetDelta

	# powerArray
	if(length(targetDelta) == 1) output$powerArray <- array(data = multiThreadOut[["power"]], dim = c(sims, length(totSampleSizes), length(targetDelta)))
	if(length(targetDelta) > 1 & length(totSampleSizes) == 1) output$powerArray <- multiThreadOut[["power"]]
	if(length(targetDelta) > 1) output$powerArray <- multiThreadOut[["power"]]
	dimnames(output$powerArray) <- list(seq_len(sims), totSampleSizes, targetDelta)

	# deltaArray
	if(length(targetDelta) == 1 & length(totSampleSizes) == 1) output$deltaArray <- list(matrix(multiThreadOut[["delta"]]))
	if(length(targetDelta) == 1 & length(totSampleSizes) > 1) output$deltaArray <- list(multiThreadOut[["delta"]])
	if(length(targetDelta) > 1 & length(totSampleSizes) == 1) output$deltaArray <- lapply(multiThreadOut[["delta"]],as.matrix)
	if(length(targetDelta) > 1 & length(totSampleSizes) > 1) output$deltaArray <- multiThreadOut[["delta"]]
	names(output$deltaArray) <- targetDelta
	for(d in seq_along(targetDelta)){
	colnames(output$deltaArray[[d]]) <- totSampleSizes
	}


	# calculate mean marTypeI and adding names
	if(length(targetDelta) == 1 & length(totSampleSizes) == 1) output$metric$marTypeI <- matrix(mean(multiThreadOut[["metric"]]$marTypeI, na.rm=TRUE))
	if(length(targetDelta) == 1 & length(totSampleSizes) > 1) output$metric$marTypeI <- matrix(apply(multiThreadOut[["metric"]]$marTypeI, 2, mean, na.rm=TRUE), ncol = 1)
	if(length(targetDelta) > 1) output$metric$marTypeI <- apply(multiThreadOut[["metric"]]$marTypeI, c(2,3), mean, na.rm=TRUE)
	rownames(output$metric$marTypeI) <- totSampleSizes
	colnames(output$metric$marTypeI) <- targetDelta

	# calculate mean classicalPower and adding names
	if(length(targetDelta) == 1 & length(totSampleSizes) == 1) output$metric$classicalPower <- matrix(mean(multiThreadOut[["metric"]]$classicalPower, na.rm=TRUE))
	if(length(targetDelta) == 1 & length(totSampleSizes) > 1) output$metric$classicalPower <- matrix(apply(multiThreadOut[["metric"]]$classicalPower, 2, mean, na.rm=TRUE), ncol = 1)
	if(length(targetDelta) > 1) output$metric$classicalPower <- apply(multiThreadOut[["metric"]]$classicalPower, c(2,3), mean, na.rm=TRUE)
	rownames(output$metric$classicalPower) <- totSampleSizes
	colnames(output$metric$classicalPower) <- targetDelta

	# calculate mean FDR and adding names
	if(length(targetDelta) == 1 & length(totSampleSizes) == 1) output$metric$FDR <- matrix(mean(multiThreadOut[["metric"]]$FDR, na.rm=TRUE))
	if(length(targetDelta) == 1 & length(totSampleSizes) > 1) output$metric$FDR <- matrix(apply(multiThreadOut[["metric"]]$FDR, 2, mean, na.rm=TRUE), ncol = 1)
	if(length(targetDelta) > 1) output$metric$FDR <- apply(multiThreadOut[["metric"]]$FDR, c(2,3), mean, na.rm=TRUE)
	rownames(output$metric$FDR) <- totSampleSizes
	colnames(output$metric$FDR) <- targetDelta

	# calculate mean FDC and adding names
	if(length(targetDelta) == 1 & length(totSampleSizes) == 1) output$metric$FDC <- matrix(mean(multiThreadOut[["metric"]]$FDC, na.rm=TRUE))
	if(length(targetDelta) == 1 & length(totSampleSizes) > 1) output$metric$FDC <- matrix(apply(multiThreadOut[["metric"]]$FDC, 2, mean, na.rm=TRUE), ncol = 1)
	if(length(targetDelta) > 1) output$metric$FDC <- apply(multiThreadOut[["metric"]]$FDC, c(2,3), mean, na.rm=TRUE)
	rownames(output$metric$FDC) <- totSampleSizes
	colnames(output$metric$FDC) <- targetDelta

	# calculate probability of detecting at least one TP and adding names
	if(length(targetDelta) == 1 & length(totSampleSizes) == 1) output$metric$probTP <- matrix(mean(multiThreadOut[["metric"]]$probTP, na.rm=TRUE))
	if(length(targetDelta) == 1 & length(totSampleSizes) > 1) output$metric$probTP <- matrix(apply(multiThreadOut[["metric"]]$probTP, 2, mean, na.rm=TRUE), ncol = 1)
	if(length(targetDelta) > 1) output$metric$probTP <- apply(multiThreadOut[["metric"]]$probTP, c(2,3), mean, na.rm=TRUE)
	rownames(output$metric$probTP) <- totSampleSizes
	colnames(output$metric$probTP) <- targetDelta

	return(output)
	}