Skip to content

Instantly share code, notes, and snippets.

@chr1swallace
Created July 12, 2013 09:33
Show Gist options
  • Save chr1swallace/5983133 to your computer and use it in GitHub Desktop.
Save chr1swallace/5983133 to your computer and use it in GitHub Desktop.
Mary's coloc.bayes.3t
coloc.bayes.3t <- function(df1,snps=setdiff(colnames(df1),response),response="Y",priors=list(rep(1,14)),r2.trim=0.99,thr=0.005,quiet=TRUE) {
#we consider all models which contain at most 1 snp for each of the three traits
snps <- unique(snps)
n.orig <- length(snps)
if(n.orig<2)
return(1)
prep <- prepare.df(df1, snps, r2.trim=r2.trim, dataset=1, quiet=quiet)
df1 <- prep$df
snps <- prep$snps
if(!quiet)
cat("Dropped",n.orig - length(snps),"of",n.orig,"SNPs due to LD: r2 >",r2.trim,"\n",length(snps),"SNPs remain.\n")
## remove any completely predictive SNPs
f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
capture.output(lm1 <- multinom(f1, data=df1,maxit=1000))
while(any(is.na(coefficients(lm1)))) {
drop <- which(is.na(coefficients(lm1))[-1])
if(!quiet)
cat("Dropping",length(drop),"inestimable SNPs (most likely due to colinearity):\n",drop,"\n")
snps <- snps[-drop]
f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
capture.output(lm1 <- multinom(f1, data=df1,maxit=1000))
}
n.clean <- length(snps)
#remove SNPs with low posterior probabilities in the individual models.
f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
df.trait1<-df1[which(df1[,1] %in% c(0,1)),c("Y",snps)]
df.trait2<-df1[which(df1[,1] %in% c(0,2)),c("Y",snps)]
df.trait3<-df1[which(df1[,1] %in% c(0,2)),c("Y",snps)]
df.trait2[,1]<-df.trait2[,1]/2
df.trait3[,1]<-df.trait3[,1]/3
modelsep<-diag(n.clean)
mod.trait1<-glib(x=df.trait1[,-1], y=df.trait1[,1], error="binomial", link="logit",models=modelsep)
mod.trait2<-glib(x=df.trait2[,-1], y=df.trait2[,1], error="binomial", link="logit",models=modelsep)
mod.trait3<-glib(x=df.trait3[,-1], y=df.trait3[,1], error="binomial", link="logit",models=modelsep)
pp.trait1<-mod.trait1$bf$postprob[,2]
pp.trait2<-mod.trait2$bf$postprob[,2]
pp.trait3<-mod.trait3$bf$postprob[,2]
whichsnps<-union(union(which(pp.trait1>thr),which(pp.trait2>thr)),which(pp.trait3>thr))
cat("We consider ",length(whichsnps), " SNPs in the final analysis. \n")
snps<-snps[whichsnps]
n.clean <- length(snps)
#covert to a binomial model so we can run glib
f1 <- as.formula(paste("Y ~ 1 | ", paste(snps,collapse="+")))
binmod<-mlogit2logit(f1,data=df1,choices=0:3,base.choice = 1)$data
index<-grep("z",colnames(binmod))
binX<-binmod[,index]
#remove z_1 (we do not want it in our model matrix)
binX<-binX[,-1]
#extract the new reponse
binY<-binmod[,"Y.star"]
models<-makebinmod.3t(n.clean)
category<-apply(models,1,whichcat.3t)
#run glib
mods1 <- glib(x=binX, y=binY, error="binomial", link="logit",models=models)
#-2 log the bf with a flat prior
twologB10=mods1$bf$twologB10[,2]
logbf<-rep(0,14)
for (ii in 1:14){
logbf[ii]<-logsum(0.5*twologB10[which(category==(ii-1))])
}
postprobs<-vector("list",length(priors))
for (ii in 1:length(priors)){
prior<-priors[[ii]]
postlogbf<-logbf+log(prior)
postprob<-(exp(postlogbf))
postprob<-postprob/sum(postprob)
cat("for prior: ", prior/sum(prior), "\n")
cat("we have posterior: ",postprob, "\n")
cat("PP for shared variant is: ", 100*postprob[14], "% \n")
cat("--- \n")
postprobs[[ii]]=postprob
}
return(postprobs)
}
makebinmod.3t<-function(p){
#returns a model matrix for the binomial equivalent model
#p=number of snps present
snplist<-vector("list",3)
for (ii in 1:3){snplist[[ii]]=0:p}
whichsnps<-expand.grid(snplist)
whichsnps.t1<-whichsnps[,1]
whichsnps.t2<-whichsnps[,2]
whichsnps.t3<-whichsnps[,3]
nummod<-nrow(whichsnps)
models.t1<-matrix(0,nummod,p)
models.t2<-matrix(0,nummod,p)
models.t3<-matrix(0,nummod,p)
for (ii in 1:nummod){
if (whichsnps.t1[ii]>0){
models.t1[ii,whichsnps.t1[ii]]=1
}
if (whichsnps.t2[ii]>0){
models.t2[ii,whichsnps.t2[ii]]=1
}
if (whichsnps.t3[ii]>0){
models.t3[ii,whichsnps.t3[ii]]=1
}
}
models<-cbind(models.t1,models.t2,rep(1,nummod),models.t3,rep(1,nummod))
return(models)
}
mywhich <- function(tline) {
wh <- which(line==1)
if(length(wh)==0)
wh <- 0
return(wh)
}
divide.hyps <- function(t1,t2,t3) {
if (t1!=t2 & t2!=t3 & t3!=t1)
return(10)
if (t1==t2 & t1==t3)
return(14)
## now know there must be exactly one matching pair
if (t1==t2)
return(11)
if (t1==t3)
return(12)
if (t2==t3)
return(13)
}
whichcat.3t<-function(line){
#puts the model given in line into one of the twelve categories
#assumes m=1
p<-(length(line)-2)/3
tline1<-line[1:p]
tline2<-line[(p+1):(2*p)]
tline3<-line[((2*p)+2):((3*p)+1)]
t1 <- mywhich(tline1)
t2 <- mywhich(tline2)
t3 <- mywhich(tline3)
## ttt indicates whether t. is 0 or >0 only
ttt <- paste0(min(t1,1), min(t2,1), min(t3,1))
ret <- switch(ttt,
"000"=0,
"100"=1,
"010"=2,
"001"=3,
"110"= if(t1==t2) { 7 } else { 4 },
"101"= if(t1==t3) { 8 } else { 5 },
"011"= if(t2==t3) { 9 } else { 6 },
"111"= divide.hyps(t1,t2,t3))
if(is.null(ret))
error("unrecognised category in whichcat.3t")
return(ret)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment