Last active
January 21, 2021 23:49
-
-
Save hakyim/7b46c759faa00f93df16 to your computer and use it in GitHub Desktop.
eFDR: compute empirical FDR based on observed vector of p values and empirical null p values computed using simulated phenotype
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## May 7, 2012 | |
## Hae Kyung Im | |
## haky@uchicago.edu | |
## | |
## Please cite: | |
## Eric R Gamazon, R. Stephanie Huang, Eileen Dolan, Nancy Cox, and Hae Kyung Im, (2012) | |
## Integrative Genomics: Quantifying significance of phenotype-genotype | |
## relationships from multiple sources of high-throughput data | |
## Frontiers of Genetics - under review | |
## eFDR | |
## compute empirical FDR based on observed vector of p values | |
## and null p values computed using simulated phenotype | |
## usage | |
## eFDR(pobs,pnulllist,nmax=20) | |
## arguments: | |
## pobs: vector of observed p values | |
## names(pobs) should be the names of the SNPs | |
## if no names, they will be labelled SNP1...SNPn | |
## pnulllist: list with simulated null pvalues | |
## nmax is the maximum number of top snps for which FDR will be computed | |
## the larger this number the slower the computation will be | |
## value: will return pvalue | |
library(qvalue) | |
eFDR = function(pobs,plist,nmax=100,qval=T) | |
{ | |
if(is.matrix(plist)) pnullmat = plist else | |
if(is.data.frame(plist)) pnullmat = as.matrix(plist) else | |
if(is.list(plist)) pnullmat = plist2pmat(pobs,plist) | |
if(is.null(names(pobs))) | |
{ | |
warning("Observed p vector, pobs, is not labelled - labelling SNP1..n") | |
names(pobs) = "SNP" %&% (1:length(pobs)) | |
} | |
fdrdata = eFDR.mat(pobs,pnullmat,nmax,qval) | |
return(fdrdata) | |
} | |
## turns list of pnull to matrix pnullmat | |
plist2pmat = function(pobs,pnulllist) | |
{ | |
maxnrow = max(unlist(lapply(pnulllist,length))) | |
maxnrow = max(maxnrow,pobs) | |
nsim = length(pnulllist) | |
pnullmat = matrix(NA,maxnrow,nsim) | |
for(cc in 1:nsim) | |
{ | |
pvec = pveclist[[cc]] | |
pnullmat[1:length(pvec),cc] = pvec | |
} | |
return(pnullmat) | |
} | |
## | |
eFDR.mat = function(pobs,pnullmat,nmax,qval) | |
{ | |
npobs = sum(!is.na(pobs)) | |
nnull = sum(!is.na(pnullmat)) | |
lambda = median(pobs,na.rm=T) ## lambda is used to estimate pi0 | |
pi0 = ( sum(pobs>lambda,na.rm=T) / npobs ) / | |
( sum(pnullmat>lambda,na.rm=T) / nnull ) | |
pi0 = min(pi0,1) | |
print(pi0) | |
fdrvec = rep(NA,nmax) | |
for(rr in 1:nmax) | |
{ | |
gamma = pobs[rr] | |
npless.null = sum(pnullmat<=gamma,na.rm=T) | |
npless.obs = sum(pobs<=gamma,na.rm=T) | |
fdr.cons = (npless.null/nnull) / (npless.obs/npobs) | |
fdrvec[rr] = fdr.cons | |
} | |
fdrvec[fdrvec>1]=1 | |
for(rr in nmax:2) | |
{ | |
if(fdrvec[rr]<fdrvec[rr-1]) fdrvec[rr-1] = fdrvec[rr] | |
} | |
fdrdata = data.frame(pobs=pobs[1:nmax]) | |
if(qval) fdrdata$FDR = qvalue(pobs)$qval[1:nmax] | |
fdrdata = data.frame(fdrdata,eFDR = fdrvec, eFDR.pi0 = fdrvec * pi0) | |
rownames(fdrdata) = names(pobs)[1:nmax] | |
return(fdrdata) | |
} | |
##eFDR(pveclist[[1]],pveclist[-1],nmax=20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment