mikelove/ratio.R

## ratio.R
library(fishpond)
library(SingleCellExperiment)
se <- makeSimSwishData(m=20, n=10)
sce <- as(se, "SingleCellExperiment")
assayNames(sce)
# make up the structure we will be having:
rownames(sce) <- paste0("txp",1:10,"_",rep(c("M","P"),each=10))

# about here it would be good to filter out any transcripts with very low expression
# in either species... i don't have code for that here, but we can work on that when
# you're ready. removing transcripts here will help to cut down on memory usage.
# you can also then save() the filtered object, and sometimes restarting R with a fresh
# session and load() will keep your memory footprint low

# cut to only the maternal and paternal counts by splitting the assays in half:
sce_mat <- sce[1:10,]
sce_pat <- sce[11:20,]

# make a empty container:
sce_ratio <- SingleCellExperiment(assays=list(), rowData=rowData(sce_mat), colData=colData(sce_mat))

# compute ratio across bootstraps
# i think it may be useful to add 1 to each allele... we can reconsider later
inf <- function(x,i) assays(x)[[paste0("infRep",i)]]
for (i in 1:20) {
  assays(sce_ratio)[[paste0("infRep",i)]] <- (inf(sce_mat,i) + 1) / (inf(sce_mat,i) + inf(sce_pat,i) + 2)
}

# examine:
assays(sce_ratio)[["infRep1"]][1:5,1:5]

# make into array, here we will need to go from sparse to dense, and this
# will inflate the memory usage, so we want to make the matrices as filtered
# as possible before this step
assays(sce_ratio) <- lapply(assays(sce_ratio), as.matrix)
ratio <- fishpond:::getInfReps(sce_ratio)
dim(ratio)

# now the code is basic R...
mu <- apply(ratio, 1:2, mean)
sd <- apply(ratio, 1:2, sd)

# a histogram of the global posterior mean of allelic ratio:
hist(colMeans(mu))

# better is to weight this by inverse of posterior variance
# here i do a little more than that.. i'm also adding a fudge factor epsilon
# this is actually something we should discuss more in person...
eps <- colMeans(sd)
wts <- apply(1/(sd + eps)^2, 2, function(x) x/sum(x))

# histogram of the weighted global posterior mean of allelic ratio:
hist(colSums(mu * wts))
	library(fishpond)
	library(SingleCellExperiment)
	se <- makeSimSwishData(m=20, n=10)
	sce <- as(se, "SingleCellExperiment")
	assayNames(sce)
	# make up the structure we will be having:
	rownames(sce) <- paste0("txp",1:10,"_",rep(c("M","P"),each=10))

	# about here it would be good to filter out any transcripts with very low expression
	# in either species... i don't have code for that here, but we can work on that when
	# you're ready. removing transcripts here will help to cut down on memory usage.
	# you can also then save() the filtered object, and sometimes restarting R with a fresh
	# session and load() will keep your memory footprint low

	# cut to only the maternal and paternal counts by splitting the assays in half:
	sce_mat <- sce[1:10,]
	sce_pat <- sce[11:20,]

	# make a empty container:
	sce_ratio <- SingleCellExperiment(assays=list(), rowData=rowData(sce_mat), colData=colData(sce_mat))

	# compute ratio across bootstraps
	# i think it may be useful to add 1 to each allele... we can reconsider later
	inf <- function(x,i) assays(x)[[paste0("infRep",i)]]
	for (i in 1:20) {
	assays(sce_ratio)[[paste0("infRep",i)]] <- (inf(sce_mat,i) + 1) / (inf(sce_mat,i) + inf(sce_pat,i) + 2)
	}

	# examine:
	assays(sce_ratio)[["infRep1"]][1:5,1:5]

	# make into array, here we will need to go from sparse to dense, and this
	# will inflate the memory usage, so we want to make the matrices as filtered
	# as possible before this step
	assays(sce_ratio) <- lapply(assays(sce_ratio), as.matrix)
	ratio <- fishpond:::getInfReps(sce_ratio)
	dim(ratio)

	# now the code is basic R...
	mu <- apply(ratio, 1:2, mean)
	sd <- apply(ratio, 1:2, sd)

	# a histogram of the global posterior mean of allelic ratio:
	hist(colMeans(mu))

	# better is to weight this by inverse of posterior variance
	# here i do a little more than that.. i'm also adding a fudge factor epsilon
	# this is actually something we should discuss more in person...
	eps <- colMeans(sd)
	wts <- apply(1/(sd + eps)^2, 2, function(x) x/sum(x))

	# histogram of the weighted global posterior mean of allelic ratio:
	hist(colSums(mu * wts))