Created
April 15, 2021 20:02
-
-
Save mikelove/022d20f5e9b1326b3c0ff805467f827e to your computer and use it in GitHub Desktop.
Allelic ratio with inferential variance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(fishpond) | |
library(SingleCellExperiment) | |
se <- makeSimSwishData(m=20, n=10) | |
sce <- as(se, "SingleCellExperiment") | |
assayNames(sce) | |
# make up the structure we will be having: | |
rownames(sce) <- paste0("txp",1:10,"_",rep(c("M","P"),each=10)) | |
# about here it would be good to filter out any transcripts with very low expression | |
# in either species... i don't have code for that here, but we can work on that when | |
# you're ready. removing transcripts here will help to cut down on memory usage. | |
# you can also then save() the filtered object, and sometimes restarting R with a fresh | |
# session and load() will keep your memory footprint low | |
# cut to only the maternal and paternal counts by splitting the assays in half: | |
sce_mat <- sce[1:10,] | |
sce_pat <- sce[11:20,] | |
# make a empty container: | |
sce_ratio <- SingleCellExperiment(assays=list(), rowData=rowData(sce_mat), colData=colData(sce_mat)) | |
# compute ratio across bootstraps | |
# i think it may be useful to add 1 to each allele... we can reconsider later | |
inf <- function(x,i) assays(x)[[paste0("infRep",i)]] | |
for (i in 1:20) { | |
assays(sce_ratio)[[paste0("infRep",i)]] <- (inf(sce_mat,i) + 1) / (inf(sce_mat,i) + inf(sce_pat,i) + 2) | |
} | |
# examine: | |
assays(sce_ratio)[["infRep1"]][1:5,1:5] | |
# make into array, here we will need to go from sparse to dense, and this | |
# will inflate the memory usage, so we want to make the matrices as filtered | |
# as possible before this step | |
assays(sce_ratio) <- lapply(assays(sce_ratio), as.matrix) | |
ratio <- fishpond:::getInfReps(sce_ratio) | |
dim(ratio) | |
# now the code is basic R... | |
mu <- apply(ratio, 1:2, mean) | |
sd <- apply(ratio, 1:2, sd) | |
# a histogram of the global posterior mean of allelic ratio: | |
hist(colMeans(mu)) | |
# better is to weight this by inverse of posterior variance | |
# here i do a little more than that.. i'm also adding a fudge factor epsilon | |
# this is actually something we should discuss more in person... | |
eps <- colMeans(sd) | |
wts <- apply(1/(sd + eps)^2, 2, function(x) x/sum(x)) | |
# histogram of the weighted global posterior mean of allelic ratio: | |
hist(colSums(mu * wts)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment