Skip to content

Instantly share code, notes, and snippets.

@chr1swallace
Created July 19, 2017 09:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chr1swallace/6ae120dc0df70b67686516ef7558c64b to your computer and use it in GitHub Desktop.
Save chr1swallace/6ae120dc0df70b67686516ef7558c64b to your computer and use it in GitHub Desktop.
## sharing parameter, largish, so we expect to see a big enough difference to check if working
S <- 100
prior.bin.fn <- function(nT1,s=100,mT1=3) {
dbinom(nT1,size=s,prob=mT1/s)/choose(s,nT1)
}
bothpp <- function(data) {
ss1 <- strsplit(data$t1.str,"%")
ss2 <- strsplit(data$t2.str,"%")
data$overlap <- sapply(1:nrow(data), function(i)
length(intersect(ss1[[i]],ss2[[i]]))>0)
data$apr <- prior.bin.fn(data$t1.size) * prior.bin.fn(data$t2.size) * ifelse(data$overlap,S,1)
data$apr <- data$apr/sum(data$apr)
data$pp <- MTFM:::calcpp(data$apr,data$t1t2.logbf)
data$rpr <- prior.bin.fn(data$t1.size) * prior.bin.fn(data$t2.size)
data$rpr <- data$rpr/sum(data$rpr)
data$rpp <- MTFM:::calcpp(data$rpr,data$t1t2.logbf)
return(data)
}
makeM <- function(str,usnps) {
M <- matrix(0,length(str),length(usnps),dimnames=list(NULL,usnps))
ss <- strsplit(str,"%")
for(i in seq_along(ss))
M[i,ss[[i]]] <- 1
return(M)
}
library(MTFM)
library(data.table)
margpp <- function(data) {
dt <- as.data.table(data)
d1 <- (dt[,.(t1.logbf=unique(t1.logbf),t1.size=unique(t1.size),pp.orig=sum(pp),rpp.orig=sum(rpp)),
by=c("t1.str")])
d2 <- (dt[,.(t2.logbf=unique(t2.logbf),t2.size=unique(t2.size),pp.orig=sum(pp),rpp.orig=sum(rpp)),
by=c("t2.str")])
setnames(d1,c("str","logbf","size","spp.orig","mpp.orig"))
setnames(d2,c("str","logbf","size","spp.orig","mpp.orig"))
d1 <- unique(d1,by="str")
d2 <- unique(d2,by="str")
d1$pr <- prior.bin.fn(d1$size)
d2$pr <- prior.bin.fn(d2$size)
d1$mpp.direct <- MTFM:::calcpp(d1$pr,d1$logbf)
d2$mpp.direct <- MTFM:::calcpp(d2$pr,d2$logbf)
ss1 <- strsplit(data$t1.str,"%")
ss2 <- strsplit(data$t2.str,"%")
usnps <- unique(unlist(c(ss1,ss2)))
length(usnps)
M1 <- makeM(d1$str,usnps)
M2 <- makeM(d2$str,usnps)
PP <- marginalpp2(M1,M2,d1$logbf,d2$logbf,d1$pr,d2$pr,S,prior.bin.fn(0))
d1$spp.new <- as.numeric(PP$shared.pp1)[-1]
d1$mpp.new <- as.numeric(PP$single.pp1)[-1]
d2$spp.new <- as.numeric(PP$shared.pp2)[-1]
d2$mpp.new <- as.numeric(PP$single.pp2)[-1]
return(list(d1=d1,d2=d2))
}
## load data
data <- read.table("/scratch/wallace/IL2RA/AD-AC-rep_100-bf-final.txt",header=TRUE,as.is=TRUE)
## analyse
data <- bothpp(data)
marg <- margpp(data)
head(marg$d1[,.(str,mpp.orig,mpp.direct,mpp.new)])
head(marg$d1[,.(str,spp.orig,spp.new)])
## mpp (marginal, unshared) matches, but not aggregating over orig
## spp (marginal, shared) doesn't match
nrow(data)
nrow(marg$d1) * nrow(marg$d2)
## is it because data has only a subset of possible model pairs?
adata <- expand.grid(1:nrow(d1),1:nrow(d2))
adata <- cbind(adata,marg$d1[adata$Var1,.(str,logbf,size)])
adata <- cbind(adata,marg$d2[adata$Var2,.(str,logbf,size)])
colnames(adata)[-c(1:2)] <- c("t1.str","t1.logbf","t1.size","t2.str","t2.logbf","t2.size")
adata$t1t2.logbf <- adata$t1.logbf + adata$t2.logbf - 46.6
## now perform same analysis as above
adata <- bothpp(adata)
amarg <- margpp(adata)
head(amarg$d1[,.(str,mpp.orig,mpp.direct,mpp.new)])
head(amarg$d1[,.(str,spp.orig,spp.new)])
with(amarg$d1,plot(spp.orig,spp.new))
## now mpp matches by all methods, but not spp
## write out marginalpp here in R
d1 <- amarg$d1
d2 <- amarg$d2
ss1 <- strsplit(d1$str,"%")
ss2 <- strsplit(d2$str,"%")
Qi <- numeric(nrow(d1))
Oi <- numeric(nrow(d1))
for(i in 1:nrow(d1)) {
Qi[i] <- 0
for(j in 1:nrow(d2)) {
overlap <- length(intersect(ss1[[i]],ss2[[j]]))
if(overlap>0)
Qi[i] <- Qi[i] + d2$mpp.new[[j]]
else
Oi[i] <- Oi[i] + d2$mpp.new[[j]]
}
}
d1$alt2.pr <- d1$pr * (Oi + S * Qi)
d1$alt2.pr <- d1$alt2.pr / sum(d1$alt2.pr)
d1$spp.alt2 <- MTFM:::calcpp(d1$alt2.pr,d1$logbf)
d1$alt.pr <- d1$pr * (1 + (S-1) * Qi)
d1$alt.pr <- d1$alt.pr / sum(d1$alt.pr)
d1$spp.alt <- MTFM:::calcpp(d1$alt.pr,d1$logbf)
head(d1[,.(str,spp.orig,spp.new,spp.alt,spp.alt2)])
## spp.alt matches spp.new, not spp.orig.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment