Skip to content

Instantly share code, notes, and snippets.

@k3yavi
Last active February 11, 2021 15:29
Show Gist options
  • Save k3yavi/03e2c82330fffd97e3f2b4c09118dbab to your computer and use it in GitHub Desktop.
Save k3yavi/03e2c82330fffd97e3f2b4c09118dbab to your computer and use it in GitHub Desktop.
suppressPackageStartupMessages({
library(devtools)
library(Signac)
library(Seurat)
library(Matrix)
library(ggplot2)
library(patchwork)
library(future)
library(dplyr)
library(pbapply)
library(reshape2)
set.seed(1234)
})
fits <- list(
"Mono"=readRDS("/home/srivastavaa/test_rds/distributions.rds"),
"CD4_T"=readRDS("/home/srivastavaa/test_rds/cd4_distributions.rds")
)
# play with CEBPA
obj <- readRDS("/home/srivastavaa/v8_bingjie_cut_n_tag/seurat_objects/k4me1.rds")
region <- Signac:::FindRegion(obj, "CEBPA")
# stupid way of extracting 200 base intervals
start <- region@ranges@start
end <- region@ranges@start + region@ranges@width
start <- start %/% 200
end <- end %/% 200
qregion <- StringToGRanges( paste0(as.character(region@seqnames@values), "-",
seq(start*200, end*200, 200), "-",
seq(start*200 + 200, end*200 + 200, 200)
)
)
qregion
# extracting counts
celltype <- "CD4 T"
marks <- c("k4me1.rds", "k4me2.rds", "k4me3.rds", "k27ac.rds", "k27me3.rds", "k9me3.rds")
plan("multiprocess", workers = 6)
nmats <- pblapply(marks, function(mark) {
obj <- readRDS(paste0("/home/srivastavaa/v8_bingjie_cut_n_tag/seurat_objects/", mark))
nmat <- Signac:::SingleFeatureMatrix(
obj@assays$tiles@fragments[[1]],
qregion,
cells = names(obj$predicted.celltype.l1[obj$predicted.celltype.l1 == celltype])
)
rowSums(nmat)
})
length(nmats)
names(nmats) <- marks
celltype <- "Mono"
mmats <- pblapply(marks, function(mark) {
obj <- readRDS(paste0("/home/srivastavaa/v8_bingjie_cut_n_tag/seurat_objects/", mark))
nmat <- Signac:::SingleFeatureMatrix(
obj@assays$tiles@fragments[[1]],
qregion,
cells = names(obj$predicted.celltype.l1[obj$predicted.celltype.l1 == celltype])
)
rowSums(nmat)
})
length(mmats)
names(mmats) <- marks
# extracting probabilities
GetLikelihood <- function(states, marks, cts, distribution) {
num.states <- length(states)
num.marks <- length(marks)
likelihood <- matrix(0, num.states, num.marks)
for (mark.id in seq(num.marks)) {
mark <- marks[[mark.id]]
if(!(mark %in% names(cts))) {
print("ERROR: can't find mark in cts")
}
region.count <- cts[[mark]]
for (state in seq(num.states)) {
state.dist <- distribution[[mark]][[state]]
if (region.count %in% as.integer(names(state.dist))) {
val <- as.numeric(state.dist[as.character(region.count)])
likelihood[state, mark.id] <- val
}
}
}
colnames(likelihood) <- marks
rownames(likelihood) <- states
likelihood
}
GetPosterior <- function(counts, fit, states, marks) {
regions <- colnames(counts)
probs <- lapply(regions, function(region.id) {
hist.counts <- counts[, region.id]
names(hist.counts) <- rownames(cdt)
likelihood <- GetLikelihood(states, marks, hist.counts, fit)
likelihood <- t(t(likelihood) / rowSums(t(likelihood)))
likelihood <- rowSums(likelihood)
posterior <- likelihood / sum(likelihood)
})
region.probs <- do.call(rbind.data.frame, probs)
colnames(region.probs) <- states
region.probs
}
## processing
cdt <- do.call(rbind.data.frame, nmats)
colnames(cdt) <- seq(dim(cdt)[[2]])
rownames(cdt) <- marks
cdt
mono <- do.call(rbind.data.frame, mmats)
colnames(mono)<- seq(dim(cdt)[[2]])
rownames(mono) <- marks
mono
cdt
mono
marks <- c("k4me1.rds", "k4me2.rds", "k4me3.rds", "k27ac.rds", "k27me3.rds", "k9me3.rds")
m.post <- GetPosterior(mono, fits[["Mono"]], seq(12), marks)
t.post <- GetPosterior(cdt, fits[["CD4_T"]], seq(12), marks)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment