Created
July 20, 2017 14:12
-
-
Save chr1swallace/cad8dc86f4acb5b69dbe8ab8a9c1f32e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(parallel) | |
library(annotSnpStats) | |
library(snpStats) | |
library(GUESSFM) | |
## | |
inv.logit.fn <-function(x) return(exp(x)/(1+exp(x))) | |
## | |
phen.gen.fn <-function(beta1=c(-2.3,.2,.2),beta2=c(-2.3,.2,.2),snpG,N0=100,N1=100,N2=100,causals1.ind,causals2.ind) { | |
# beta[1] = log(prev) | |
N <-dim(snpG)[2] # number of indivs | |
causal1 <- snpG[causals1.ind,] | |
causal2 <- snpG[causals2.ind,] | |
n0=0 | |
n1=0 | |
n2=0 | |
G0 <-NULL | |
G1 <- NULL | |
G2 <- NULL | |
j=1 | |
while( (n0<=N0 | n1 <=N1 | n2 <=N2) & (j <= N) ) { | |
p1 <-inv.logit.fn(beta1[1]+sum(beta1[-1]*causal1[,j])) | |
p2 <-inv.logit.fn(beta2[1]+sum(beta2[-1]*causal2[,j])) | |
u<-runif(2) | |
if(u[1]>p1 & u[1]>p2) { | |
n0<-n0 + 1 | |
G0 <-rbind(G0,snpG[,j]) | |
} else if(u[1]<=p1 & u[1]>p2) { | |
n1<-n1 + 1 | |
G1 <-rbind(G1,snpG[,j]) | |
} else if(u[1]<=p2 & u[1]>p1) { | |
n2<-n2 + 1 | |
G2 <-rbind(G2,snpG[,j]) | |
} else { | |
if(u[2]<0.5 & n1<N1 ) { n1<-n1 + 1; G1 <-rbind(G1,snpG[,j]) | |
} else { n2<-n2 + 1; G2 <-rbind(G2,snpG[,j]) } | |
} | |
j <- j+1 | |
} | |
G<-rbind(G0[1:N0,],G1[1:N1,],G2[1:N2,]) | |
rownames(G)<-c(paste("control.",1:N0,sep=""),paste("case1.",1:N1,sep=""),paste("case2.",1:N2,sep="")) | |
colnames(G) <- rownames(snpG) | |
return(list(G=G,y=c(rep(0,N0),rep(1,N1),rep(2,N2)) )) | |
} | |
#### | |
format.mod.fn <- function(k,out) { | |
ind <- which(out[k,]==1) | |
if(length(ind)>0) { | |
mod <- paste(names(out[k,][ind]),sep="",collapse="%") | |
} else {mod <- "0"} | |
return(mod) | |
} | |
##### | |
#' @title Calculate joint PP at tag SNP models and then expanded models | |
#' @param t1snp.data a SnpMatrix object for trait 1 | |
#' @param t1pheno a phenotype vector for trait 1 | |
#' @param t2snp.data a SnpMatrix object for trait 2 | |
#' @param t2pheno a phenotype vector for trait 2 | |
#' @param tags tag SNPs from common controls | |
#' @param mppthr threshold for "best" SNPs MPP | |
#' @param mT1 minimum model size for trait 1 | |
#' @param MT1 maximum model size for trait 1 | |
#' @param mT2 minimum model size for trait 2 | |
#' @param MT2 maximum model size for trait 2 | |
#' @return a list with components pp (data.frame of joint PP, prior, logprior, logBF, and for each trait, model, logBF, model size) and logBF0, the offset term needed to calculate joint BFs | |
#' @export | |
sharedmPP.fn <- function(kappa,t1snp.data,t1pheno,t2snp.data,t2pheno, | |
tags,mppthr,mT1,MT1,mT2,MT2, | |
trait1="T1",trait2="T2",mydir) { | |
#' find "best" tag snps | |
## t1snps <- gfm.sel.snps.fn(snpG=t1snp.data,y=t1pheno,tags=tags,mppthr=mppthr,mydir) | |
## t2snps <- gfm.sel.snps.fn(snpG=t2snp.data,y=t2pheno,tags=tags,mppthr=mppthr,mydir) | |
## msnps <- union(t1snps[,"var"],t2snps[,"var"]) | |
## s <- length(msnps) | |
msnps <- unique(tags(tags)) | |
#s <- length(msnps) | |
#' generate all models from tag snps | |
T1mod <- T1mods.fn(mT1,MT1,msnps) | |
T2mod <- T1mods.fn(mT2,MT2,msnps) | |
G1.mat <- as(t1snp.data,"numeric") | |
data1 <- data.frame(Y=t1pheno,G1.mat) | |
G2.mat <- as(t2snp.data,"numeric") | |
data2 <- data.frame(Y=t2pheno,G2.mat) | |
## bf1 <- abf.calc(y=data1[,1],x=data1[,-1],models=as.character(T1mod$mod),family="binomial")[[1]] | |
## bf2 <- abf.calc(y=data2[,1],x=data2[,-1],models=as.character(T2mod$mod),family="binomial")[[1]] | |
D1 <- data.frame(mod=T1mod$mod,size=T1mod$size)# ,logbf=bf1$lBF) | |
D2 <- data.frame(mod=T2mod$mod,size=T2mod$size)# ,logbf=bf2$lBF) | |
e1 <- expand.tags.bf(D1,tags) | |
e2 <- expand.tags.bf(D2,tags) | |
bf1 <- abf.calc(y=data1[,1],x=data1[,-1],models=e1$str,family="binomial")[[1]] | |
bf2 <- abf.calc(y=data2[,1],x=data2[,-1],models=e2$str,family="binomial")[[1]] | |
STR <- list(e1$str,e2$str) | |
ABF <- list(bf1$lBF,bf2$lBF) | |
pr1 <- prior.bin.fn(e1$size) | |
pr2 <- prior.bin.fn(e2$size) | |
pr <- list(pr1,pr2) | |
p0 <- prior.bin.fn(0) | |
mpp <- marginalpp(STR, ABF, pr, kappa, p0) | |
pp1 <- t(mpp$shared.pp[[1]]) | |
rownames(pp1) <- mpp$STR[[1]] | |
colnames(pp1) <- paste("pp",kappa,sep=".") | |
pp2 <- t(mpp$shared.pp[[2]]) | |
rownames(pp2) <- mpp$STR[[2]] | |
colnames(pp2) <- paste("pp",kappa,sep=".") | |
return(list(pp1=pp1,pp2=pp2)) | |
} | |
### | |
PP2collapse.fn <- function(PP) { | |
b <- data.frame(matrix(0,nrow=8,ncol=10)) | |
b[,1] <- c("rs12722563","rs12722522","rs12722508","rs7909519","rs61839660","rs12722496","rs12722495","rs41295049") | |
b[,2] <- "rs2104286" | |
b[,3] <- c("rs11597367","rs35285258","rs11594656",rep("rs",5)) | |
b[,4] <- c("rs3118475","rs62626317","rs41295055","rs62626325","rs41295105","rs56382813","rs41295079","rs") | |
b[,5] <- "rs6602437" | |
b[,6] <- c("rs41295121","rs41295159",rep("rs",6)) | |
b[,7] <- "rs706779" # AITD | |
b[,8] <- "rs3118470" # AA | |
b[,9] <- "rs4147359" # UC | |
b[,10] <- "rs10795791" # RA | |
m <- length(PP) | |
check <- NULL | |
for(i in 1:m) { | |
bsnp <- unlist(strsplit(as.character(names(PP)[i]),"%")) | |
checki <- numeric(10) | |
for(j in 1:10) checki[j] <- 1*(length(intersect(b[,j],bsnp))>0) | |
check <- rbind(check,checki) | |
} | |
gnames <- c("A","B","C","D","E","F","AITD","AA","UC","RA") | |
colnames(check) <- gnames | |
rownames(check)<-NULL | |
mod.names.fn <- function(k,check) {paste(colnames(check)[which(check[k,]>0)],collapse="-")} | |
k.mat <- matrix(1:m,ncol=1) | |
mod.names <- apply(k.mat,1,mod.names.fn,check) | |
c2 <- combn(1:10,2,simplify=TRUE) | |
c2names <- apply(combn(gnames,2,simplify=TRUE),2,name.fn <- function(cc) return(paste(cc,collapse="."))) | |
c3 <- combn(1:10,3,simplify=TRUE) | |
c3names <- apply(combn(gnames,3,simplify=TRUE),2,name.fn <- function(cc) return(paste(cc,collapse="."))) | |
m2 <- matrix(0,nrow=length(c2names),ncol=10); for(i in 1:length(c2names)) { m2[i,c2[,i]] <- 1 } | |
m3 <- matrix(0,nrow=length(c3names),ncol=10); for(i in 1:length(c3names)) { m3[i,c3[,i]] <- 1 } | |
m1 <- diag(10) | |
M <- rbind(rep(0,10),m1,m2,m3) | |
c1names <- gnames | |
rownames(M) <- c("m0",c1names,c2names,c3names) | |
colnames(M) <-c("A","B","C","D","E","F","AITD","AA","UC","RA") | |
out <- data.frame(PP=PP,check) | |
names(out)[2:11] <- gnames | |
outPP <- as.data.frame(matrix(0,nrow = 1, ncol = dim(M)[1],dimnames = list(NULL, rownames(M)))) | |
indr <- NULL | |
ind <- 1:m | |
r <- 0 | |
indr <- NULL | |
while(length(ind)>0 & r<dim(outPP)[2]) { | |
while(length(indr)==0) { | |
r <- r+1; #print(c("new r",r)) | |
indr <- which(apply(out[,-1], 1, function(x) all(x == M[r,])) ) | |
rM <- rownames(M)[r] | |
} | |
outPP[,rM] <- sum(PP[indr]) | |
ind <- setdiff(ind,indr) | |
indr <- NULL | |
} | |
# write.table(outPP,fname1,quote=FALSE,row.names=FALSE,col.names=TRUE) | |
return(outPP) | |
} | |
## | |
prior.bin.fn <- function(nT1,s=605,mT1=3) { | |
dbinom(nT1,size=s,prob=mT1/s)/choose(s,nT1) | |
} | |
### | |
expand.tags.bf <- function(best, tags) { | |
bsnps <- unique(unlist(strsplit(as.character(best$mod),"%"))) | |
check <- which(bsnps=="0" | bsnps=="1") | |
if(length(check)>0) bsnps <- bsnps[-check] | |
B <- dim(best)[1] | |
wh <- which(make.names(tags(tags)) %in% bsnps) | |
if (!length(wh)) | |
stop("none of the supplied tags are amongst the best SNPs in d") | |
proxies <- split(make.names(snps(tags)[wh]), make.names(tags(tags)[wh])) | |
if (!all(bsnps %in% names(proxies))) | |
stop("not all model SNPs found in tags object") | |
message("expanding tags for ", B, " models over ", length(proxies), | |
" tag SNPs, tagging a total of ", length(unlist(proxies)), | |
" SNPs.") | |
pm <- mclapply(as.list(1:B), function(i) { | |
if (best[i, "size"] == 0) { | |
pm.str <- "" | |
} | |
else { | |
pm.str <- apply(do.call(expand.grid, proxies[unlist(strsplit(as.character(best$mod[i]),"%"))]), | |
1, makestr) | |
} | |
gc() | |
return(pm.str) | |
}) | |
npm <- sapply(pm, length) | |
neighb <- as.data.table(best)[rep(1:length(pm), times = npm), | |
] | |
setnames(neighb, sub("str", "index.str", names(neighb))) | |
#neighb[, `:=`(str, unlist(pm))] | |
#neighb <- neighb[order(neighb$logbf, decreasing = TRUE), ] | |
#neighb[, `:=`(rank, 1:nrow(neighb))] | |
best <- as.data.frame(neighb) | |
return(best) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment