Skip to content

Instantly share code, notes, and snippets.

@mikelove
Created February 16, 2022 01:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikelove/00ad7156735c2989dbb53ff2cf96ae99 to your computer and use it in GitHub Desktop.
Save mikelove/00ad7156735c2989dbb53ff2cf96ae99 to your computer and use it in GitHub Desktop.
OSCA examples for plotting counts and allelic counts
library(SingleCellExperiment)
sce <- readRDS("sce.rds")
labels <- readRDS("cellLabels.rds")
sce <- sce[,names(labels)]
colLabels(sce) <- labels
table(labels)
assayNames(sce)
total_count <- rowSums(assay(sce))
hist(log10(total_count+1))
hist(round(total_count[total_count < 10]))
mcols(sce)$total_count <- total_count # save for later in mcols = metadata columns
sce <- sce[mcols(sce)$total_count > 2,] # arbitrary - drop genes with super low counts
# some basic steps for plotting in Bioc
library(scran)
set.seed(1)
clust0 <- quickCluster(sce)
sce <- computeSumFactors(sce, cluster=clust0)
sce <- logNormCounts(sce)
save(sce, file="log_normed_sce.rda")
# PCA
vardf <- modelGeneVar(sce)
top <- getTopHVGs(vardf, n=2000)
set.seed(1)
sce <- fixedPCA(sce, subset.row=top)
library(scater)
# make less labels just for plotting...
sce$label_less <- factor(ifelse(as.numeric(colLabels(sce)) > 8, "else",
as.character(colLabels(sce))))
table(sce$label_less)
plotReducedDim(sce, dimred="PCA", colour_by="label_less")
# UMAP
set.seed(1)
sce <- runUMAP(sce, dimred="PCA")
plotReducedDim(sce, dimred="UMAP", colour_by="label_less")
# look at simple plots
assay(sce, "allele") <- assay(sce, "ratio") * assay(sce, "counts")
# look at bulk ratios
mcols(sce)$bulk_ratio <- rowSums(assay(sce, "allele")) / mcols(sce)$total_count
myhist <- function(x, brks, min_count) {
# idx could instead by the names of housekeeping genes
idx <- mcols(sce)$total_count >= min_count
with(mcols(sce)[idx,],
hist(bulk_ratio, breaks=brks, col="grey50", border="white"))
}
myhist(sce, 100, 0)
myhist(sce, 100, 10)
myhist(sce, 100, 20)
library(ggplot2)
dat <- as.data.frame(mcols(sce))
ggplot(dat, aes(total_count, bulk_ratio)) +
geom_point(alpha=0.3) +
geom_hline(yintercept=0.5,col="red") +
scale_x_log10()
library(dplyr)
dat %>% filter(total_count > 1e4, abs(bulk_ratio - 0.5) < .05)
plotReducedDim(sce, dimred="UMAP", colour_by="chinmo")
# but want to find a gene with expression in most clusters
# compute cluster-level total counts and allelic counts
for (lvl in levels(sce$label_less)) {
idx <- sce$label_less %in% lvl
clst_total <- rowSums(assay(sce)[,idx])
clst_allele <- rowSums(assay(sce, "allele")[,idx])
mcols(sce)[[paste0("clst_total_",lvl)]] <- clst_total
mcols(sce)[[paste0("clst_ratio_",lvl)]] <- clst_allele / clst_total
}
# do some logic on the cluster total counts
library(tibble)
dat <- as.data.frame(mcols(sce)) %>%
rownames_to_column("gene") %>%
tibble()
# genes where each cluster has a count of 1000 or more...
dat %>% filter_at(vars(starts_with("clst_total")), all_vars(. >= 1e3))
plotReducedDim(sce, dimred="UMAP", colour_by="label_less")
plotReducedDim(sce, dimred="UMAP", colour_by="hth")
plotReducedDim(sce, dimred="UMAP", colour_by="hth", by_exprs_values="ratio")
ggplot(dat, aes(clst_total_4, clst_ratio_4)) +
geom_point(alpha=0.3) +
geom_hline(yintercept=0.5,col="red") +
scale_x_log10()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment