Skip to content

Instantly share code, notes, and snippets.

@kieranrcampbell
Created June 15, 2017 09:55
Show Gist options
  • Save kieranrcampbell/310cbf61c65242cda7322ba5246ec56e to your computer and use it in GitHub Desktop.
Save kieranrcampbell/310cbf61c65242cda7322ba5246ec56e to your computer and use it in GitHub Desktop.
Brief example of scran's method for "Accounting for technical noise in single-cell RNA-seq experiments"
library(stringr)
library(ggplot2)
library(scran)
library(scater)
library(dplyr)
# Step 1: do the overdispersion analysis
sc2 <- sce[rowMeans(round(exprs(sce)) > 0) > 0, ] # Keep only genes that are expressed
is_ercc <- grepl("NA_ERCC", featureNames(sc2)) # Your ERCCs might be named differently to mine!
count_mat <- counts(sc2)
tc <- technicalCV2(count_mat, is_ercc)
tc <- mutate(tc, feature = rownames(tc)) %>% tbl_df()
tc <- mutate(tc, is_overdisp = str_to_title(as.character(FDR < 0.05)),
gene = fData(sc2)$ensembl_gene_id)
ggplot(mapping = aes(x = mean, y = cv2)) +
geom_point(data = filter(tc, !is_ercc), aes(color = is_overdisp),
size = 1, alpha = 0.5) +
scale_x_log10() + geom_line(data = tc, mapping = aes(y = trend), color = 'black') +
scale_y_log10(limits = c(NA, max(tc$cv2, na.rm = TRUE))) +
cowplot::theme_cowplot() +
scale_color_brewer(palette = "Set2", name = "Overdispersed") +
#geom_point(data = filter(tc, is_ercc), color = "black",
# size = 1, alpha = 0.5) +
xlab(expression(mu)) + ylab(expression(CV^2)) +
theme_cowplot(font_size = 11) +
theme(legend.position = "top")
# Step 2: GO analysis on overdispersed genes
#' @param genes The genes "differentially expressed" or in the condition
#' @param all_genes The background gene set
go_analysis <- function(genes, all_genes, test_cats = "GO:BP") {
library(goseq)
all_genes <- all_genes[!is.na(all_genes)]
all_genes <- unique(all_genes) # remove duplicates
lgl <- 1 * (all_genes %in% genes)
names(lgl) <- all_genes
pwf <- nullp(lgl, "hg19", "ensGene")
go <- goseq(pwf, "hg19", "ensGene", test.cats = test_cats)
return(go)
}
all_genes <- tc$gene
od_genes <- filter(tc, is_overdisp == "True") %>% .$gene
go <- go_analysis(od_genes, all_genes)
go$q_value <- p.adjust(go$over_represented_pvalue, method = "BH")
# Step 3: A publication-quality plot of top 6 go terms
go_top <- head(go, n = 6)
go_top$term <- str_to_title(go_top$term)
terms_sorted <- arrange(go_top, desc(q_value)) %>% .$term
go_top$term <- factor(go_top$term, levels = terms_sorted)
ggplot(go_top, aes(x = term, y = -log10(q_value))) +
geom_point() +
coord_flip() +
theme(axis.title.y = element_blank(), legend.position = "none") +
ylab(expression(paste("-", log[10], " q-value"))) +
theme(#legend.position = c(-.2, -.14), legend.direction = "horizontal",
legend.position = c(0, 0), legend.direction = "horizontal",
axis.text = element_text(size = 8),
axis.title = element_text(size = 10),
legend.title = element_text(size = 11),
legend.text = element_text(size = 10))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment