Create a gist now

Instantly share code, notes, and snippets.

@hakyim /eFDR.r
Created Nov 19, 2014

What would you like to do?
eFDR: compute empirical FDR based on observed vector of p values and empirical null p values computed using simulated phenotype
## May 7, 2012
## Hae Kyung Im
## haky@uchicago.edu
##
## Department of Health Studies
## Biostatistics Laboratory
## University of Chicago
## Please cite:
## Eric R Gamazon, R. Stephanie Huang, Eileen Dolan, Nancy Cox, and Hae Kyung Im, (2012)
## Integrative Genomics: Quantifying significance of phenotype-genotype
## relationships from multiple sources of high-throughput data
## Frontiers of Genetics - under review
## eFDR
## compute empirical FDR based on observed vector of p values
## and null p values computed using simulated phenotype
## usage
## eFDR(pobs,pnulllist,nmax=20)
## arguments:
## pobs: vector of observed p values
## names(pobs) should be the names of the SNPs
## if no names, they will be labelled SNP1...SNPn
## pnulllist: list with simulated null pvalues
## nmax is the maximum number of top snps for which FDR will be computed
## the larger this number the slower the computation will be
## value: will return pvalue
library(qvalue)
eFDR = function(pobs,plist,nmax=100,qval=T)
{
if(is.matrix(plist)) pnullmat = plist else
if(is.data.frame(plist)) pnullmat = as.matrix(plist) else
if(is.list(plist)) pnullmat = plist2pmat(pobs,plist)
if(is.null(names(pobs)))
{
warning("Observed p vector, pobs, is not labelled - labelling SNP1..n")
names(pobs) = "SNP" %&% (1:length(pobs))
}
fdrdata = eFDR.mat(pobs,pnullmat,nmax,qval)
return(fdrdata)
}
## turns list of pnull to matrix pnullmat
plist2pmat = function(pobs,pnulllist)
{
maxnrow = max(unlist(lapply(pnulllist,length)))
maxnrow = max(maxnrow,pobs)
nsim = length(pnulllist)
pnullmat = matrix(NA,maxnrow,nsim)
for(cc in 1:nsim)
{
pvec = pveclist[[cc]]
pnullmat[1:length(pvec),cc] = pvec
}
return(pnullmat)
}
##
eFDR.mat = function(pobs,pnullmat,nmax,qval)
{
npobs = sum(!is.na(pobs))
nnull = sum(!is.na(pnullmat))
lambda = median(pobs,na.rm=T) ## lambda is used to estimate pi0
pi0 = ( sum(pobs>lambda,na.rm=T) / npobs ) /
( sum(pnullmat>lambda,na.rm=T) / nnull )
pi0 = min(pi0,1)
print(pi0)
fdrvec = rep(NA,nmax)
for(rr in 1:nmax)
{
gamma = pobs[rr]
npless.null = sum(pnullmat<=gamma,na.rm=T)
npless.obs = sum(pobs<=gamma,na.rm=T)
fdr.cons = (npless.null/nnull) / (npless.obs/npobs)
fdrvec[rr] = fdr.cons
}
fdrvec[fdrvec>1]=1
for(rr in nmax:2)
{
if(fdrvec[rr]<fdrvec[rr-1]) fdrvec[rr-1] = fdrvec[rr]
}
fdrdata = data.frame(pobs=pobs[1:nmax])
if(qval) fdrdata$FDR = qvalue(pobs)$qval[1:nmax]
fdrdata = data.frame(fdrdata,eFDR = fdrvec, eFDR.pi0 = fdrvec * pi0)
rownames(fdrdata) = names(pobs)[1:nmax]
return(fdrdata)
}
##eFDR(pveclist[[1]],pveclist[-1],nmax=20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment