chr1swallace/coloc.bayes.3t.mdf.R

## coloc.bayes.3t.mdf.R
coloc.bayes.3t <- function(df1,snps=setdiff(colnames(df1),response),response="Y",priors=list(rep(1,14)),r2.trim=0.99,thr=0.005,quiet=TRUE) {
    #we consider all models which contain at most 1 snp for each of the three traits
    snps <- unique(snps)
    n.orig <- length(snps)
    if(n.orig<2)
        return(1)
    prep <- prepare.df(df1, snps, r2.trim=r2.trim, dataset=1, quiet=quiet)
    df1 <- prep$df
    snps <- prep$snps

    if(!quiet)
        cat("Dropped",n.orig - length(snps),"of",n.orig,"SNPs due to LD: r2 >",r2.trim,"\n",length(snps),"SNPs remain.\n")

    ## remove any completely predictive SNPs
    f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
    capture.output(lm1 <- multinom(f1, data=df1,maxit=1000))
    while(any(is.na(coefficients(lm1)))) {
        drop <- which(is.na(coefficients(lm1))[-1])
        if(!quiet)
            cat("Dropping",length(drop),"inestimable SNPs (most likely due to colinearity):\n",drop,"\n")
        snps <- snps[-drop]
        f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
        capture.output(lm1 <- multinom(f1, data=df1,maxit=1000))
    }
    n.clean <- length(snps)
    #remove SNPs with low posterior probabilities in the individual models.
    f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
    df.trait1<-df1[which(df1[,1] %in% c(0,1)),c("Y",snps)]
    df.trait2<-df1[which(df1[,1] %in% c(0,2)),c("Y",snps)]
    df.trait3<-df1[which(df1[,1] %in% c(0,2)),c("Y",snps)]
    df.trait2[,1]<-df.trait2[,1]/2
    df.trait3[,1]<-df.trait3[,1]/3
    modelsep<-diag(n.clean)
    mod.trait1<-glib(x=df.trait1[,-1], y=df.trait1[,1], error="binomial", link="logit",models=modelsep)
    mod.trait2<-glib(x=df.trait2[,-1], y=df.trait2[,1], error="binomial", link="logit",models=modelsep)
    mod.trait3<-glib(x=df.trait3[,-1], y=df.trait3[,1], error="binomial", link="logit",models=modelsep)
    pp.trait1<-mod.trait1$bf$postprob[,2]
    pp.trait2<-mod.trait2$bf$postprob[,2]
    pp.trait3<-mod.trait3$bf$postprob[,2]
    whichsnps<-union(union(which(pp.trait1>thr),which(pp.trait2>thr)),which(pp.trait3>thr))
    cat("We consider ",length(whichsnps), " SNPs in the final analysis. \n")
    snps<-snps[whichsnps]

    n.clean <- length(snps)
    #covert to a binomial model so we can run glib
    f1 <- as.formula(paste("Y ~ 1 | ", paste(snps,collapse="+")))
    binmod<-mlogit2logit(f1,data=df1,choices=0:3,base.choice = 1)$data
    index<-grep("z",colnames(binmod))
    binX<-binmod[,index]
    #remove z_1 (we do not want it in our model matrix)
    binX<-binX[,-1]
    #extract the new reponse
    binY<-binmod[,"Y.star"]
    models<-makebinmod.3t(n.clean)
    category<-apply(models,1,whichcat.3t)

    #run glib
    mods1 <- glib(x=binX, y=binY, error="binomial", link="logit",models=models)
    #-2 log the bf with a flat prior
    twologB10=mods1$bf$twologB10[,2]
    logbf<-rep(0,14)
    for (ii in 1:14){
        logbf[ii]<-logsum(0.5*twologB10[which(category==(ii-1))])
    }
    postprobs<-vector("list",length(priors))
    for (ii in 1:length(priors)){
        prior<-priors[[ii]]
        postlogbf<-logbf+log(prior)
        postprob<-(exp(postlogbf))
        postprob<-postprob/sum(postprob)
        cat("for prior: ", prior/sum(prior), "\n")
        cat("we have posterior: ",postprob, "\n")
        cat("PP for shared variant is: ", 100*postprob[14], "% \n")
        cat("--- \n")
        postprobs[[ii]]=postprob
    }
    return(postprobs)
}

makebinmod.3t<-function(p){
    #returns a model matrix for the binomial equivalent model
    #p=number of snps present
    snplist<-vector("list",3)
    for (ii in 1:3){snplist[[ii]]=0:p}
    whichsnps<-expand.grid(snplist)
    whichsnps.t1<-whichsnps[,1]
    whichsnps.t2<-whichsnps[,2]
    whichsnps.t3<-whichsnps[,3]
    nummod<-nrow(whichsnps)
    models.t1<-matrix(0,nummod,p)
    models.t2<-matrix(0,nummod,p)
    models.t3<-matrix(0,nummod,p)
    for (ii in 1:nummod){
        if (whichsnps.t1[ii]>0){
            models.t1[ii,whichsnps.t1[ii]]=1
        }
        if (whichsnps.t2[ii]>0){
            models.t2[ii,whichsnps.t2[ii]]=1
        }
        if (whichsnps.t3[ii]>0){
            models.t3[ii,whichsnps.t3[ii]]=1
        }
    }
    models<-cbind(models.t1,models.t2,rep(1,nummod),models.t3,rep(1,nummod))
    return(models)
}

mywhich <- function(tline) {
  wh <- which(line==1)
  if(length(wh)==0)
    wh <- 0
  return(wh)
}

divide.hyps <- function(t1,t2,t3) {
  if (t1!=t2 & t2!=t3 & t3!=t1)
    return(10)
  if (t1==t2 & t1==t3)
    return(14)
  ## now know there must be exactly one matching pair
  if (t1==t2)
    return(11)
  if (t1==t3)
    return(12)
  if (t2==t3)
    return(13)
}

whichcat.3t<-function(line){
    #puts the model given in line into one of the twelve categories
    #assumes m=1
    p<-(length(line)-2)/3
    tline1<-line[1:p]
    tline2<-line[(p+1):(2*p)]
    tline3<-line[((2*p)+2):((3*p)+1)]

    t1 <- mywhich(tline1)
    t2 <- mywhich(tline2)
    t3 <- mywhich(tline3)
    ## ttt indicates whether t. is 0 or >0 only
    ttt <- paste0(min(t1,1), min(t2,1), min(t3,1))

    ret <- switch(ttt,
           "000"=0,
           "100"=1,
           "010"=2,
           "001"=3,
           "110"= if(t1==t2) { 7 } else { 4 },
           "101"= if(t1==t3) { 8 } else { 5 },
           "011"= if(t2==t3) { 9 } else { 6 },
           "111"= divide.hyps(t1,t2,t3))
    if(is.null(ret))
      error("unrecognised category in whichcat.3t")
    return(ret)
  }
	coloc.bayes.3t <- function(df1,snps=setdiff(colnames(df1),response),response="Y",priors=list(rep(1,14)),r2.trim=0.99,thr=0.005,quiet=TRUE) {
	#we consider all models which contain at most 1 snp for each of the three traits
	snps <- unique(snps)
	n.orig <- length(snps)
	if(n.orig<2)
	return(1)
	prep <- prepare.df(df1, snps, r2.trim=r2.trim, dataset=1, quiet=quiet)
	df1 <- prep$df
	snps <- prep$snps

	if(!quiet)
	cat("Dropped",n.orig - length(snps),"of",n.orig,"SNPs due to LD: r2 >",r2.trim,"\n",length(snps),"SNPs remain.\n")

	## remove any completely predictive SNPs
	f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
	capture.output(lm1 <- multinom(f1, data=df1,maxit=1000))
	while(any(is.na(coefficients(lm1)))) {
	drop <- which(is.na(coefficients(lm1))[-1])
	if(!quiet)
	cat("Dropping",length(drop),"inestimable SNPs (most likely due to colinearity):\n",drop,"\n")
	snps <- snps[-drop]
	f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
	capture.output(lm1 <- multinom(f1, data=df1,maxit=1000))
	}
	n.clean <- length(snps)
	#remove SNPs with low posterior probabilities in the individual models.
	f1 <- as.formula(paste("Y ~", paste(snps,collapse="+")))
	df.trait1<-df1[which(df1[,1] %in% c(0,1)),c("Y",snps)]
	df.trait2<-df1[which(df1[,1] %in% c(0,2)),c("Y",snps)]
	df.trait3<-df1[which(df1[,1] %in% c(0,2)),c("Y",snps)]
	df.trait2[,1]<-df.trait2[,1]/2
	df.trait3[,1]<-df.trait3[,1]/3
	modelsep<-diag(n.clean)
	mod.trait1<-glib(x=df.trait1[,-1], y=df.trait1[,1], error="binomial", link="logit",models=modelsep)
	mod.trait2<-glib(x=df.trait2[,-1], y=df.trait2[,1], error="binomial", link="logit",models=modelsep)
	mod.trait3<-glib(x=df.trait3[,-1], y=df.trait3[,1], error="binomial", link="logit",models=modelsep)
	pp.trait1<-mod.trait1$bf$postprob[,2]
	pp.trait2<-mod.trait2$bf$postprob[,2]
	pp.trait3<-mod.trait3$bf$postprob[,2]
	whichsnps<-union(union(which(pp.trait1>thr),which(pp.trait2>thr)),which(pp.trait3>thr))
	cat("We consider ",length(whichsnps), " SNPs in the final analysis. \n")
	snps<-snps[whichsnps]

	n.clean <- length(snps)
	#covert to a binomial model so we can run glib
	f1 <- as.formula(paste("Y ~ 1 \| ", paste(snps,collapse="+")))
	binmod<-mlogit2logit(f1,data=df1,choices=0:3,base.choice = 1)$data
	index<-grep("z",colnames(binmod))
	binX<-binmod[,index]
	#remove z_1 (we do not want it in our model matrix)
	binX<-binX[,-1]
	#extract the new reponse
	binY<-binmod[,"Y.star"]
	models<-makebinmod.3t(n.clean)
	category<-apply(models,1,whichcat.3t)

	#run glib
	mods1 <- glib(x=binX, y=binY, error="binomial", link="logit",models=models)
	#-2 log the bf with a flat prior
	twologB10=mods1$bf$twologB10[,2]
	logbf<-rep(0,14)
	for (ii in 1:14){
	logbf[ii]<-logsum(0.5*twologB10[which(category==(ii-1))])
	}
	postprobs<-vector("list",length(priors))
	for (ii in 1:length(priors)){
	prior<-priors[[ii]]
	postlogbf<-logbf+log(prior)
	postprob<-(exp(postlogbf))
	postprob<-postprob/sum(postprob)
	cat("for prior: ", prior/sum(prior), "\n")
	cat("we have posterior: ",postprob, "\n")
	cat("PP for shared variant is: ", 100*postprob[14], "% \n")
	cat("--- \n")
	postprobs[[ii]]=postprob
	}
	return(postprobs)
	}

	makebinmod.3t<-function(p){
	#returns a model matrix for the binomial equivalent model
	#p=number of snps present
	snplist<-vector("list",3)
	for (ii in 1:3){snplist[[ii]]=0:p}
	whichsnps<-expand.grid(snplist)
	whichsnps.t1<-whichsnps[,1]
	whichsnps.t2<-whichsnps[,2]
	whichsnps.t3<-whichsnps[,3]
	nummod<-nrow(whichsnps)
	models.t1<-matrix(0,nummod,p)
	models.t2<-matrix(0,nummod,p)
	models.t3<-matrix(0,nummod,p)
	for (ii in 1:nummod){
	if (whichsnps.t1[ii]>0){
	models.t1[ii,whichsnps.t1[ii]]=1
	}
	if (whichsnps.t2[ii]>0){
	models.t2[ii,whichsnps.t2[ii]]=1
	}
	if (whichsnps.t3[ii]>0){
	models.t3[ii,whichsnps.t3[ii]]=1
	}
	}
	models<-cbind(models.t1,models.t2,rep(1,nummod),models.t3,rep(1,nummod))
	return(models)
	}

	mywhich <- function(tline) {
	wh <- which(line==1)
	if(length(wh)==0)
	wh <- 0
	return(wh)
	}

	divide.hyps <- function(t1,t2,t3) {
	if (t1!=t2 & t2!=t3 & t3!=t1)
	return(10)
	if (t1==t2 & t1==t3)
	return(14)
	## now know there must be exactly one matching pair
	if (t1==t2)
	return(11)
	if (t1==t3)
	return(12)
	if (t2==t3)
	return(13)
	}

	whichcat.3t<-function(line){
	#puts the model given in line into one of the twelve categories
	#assumes m=1
	p<-(length(line)-2)/3
	tline1<-line[1:p]
	tline2<-line[(p+1):(2*p)]
	tline3<-line[((2p)+2):((3p)+1)]

	t1 <- mywhich(tline1)
	t2 <- mywhich(tline2)
	t3 <- mywhich(tline3)
	## ttt indicates whether t. is 0 or >0 only
	ttt <- paste0(min(t1,1), min(t2,1), min(t3,1))

	ret <- switch(ttt,
	"000"=0,
	"100"=1,
	"010"=2,
	"001"=3,
	"110"= if(t1==t2) { 7 } else { 4 },
	"101"= if(t1==t3) { 8 } else { 5 },
	"011"= if(t2==t3) { 9 } else { 6 },
	"111"= divide.hyps(t1,t2,t3))
	if(is.null(ret))
	error("unrecognised category in whichcat.3t")
	return(ret)
	}