Last active
June 10, 2025 18:51
-
-
Save tomsing1/41d30dda3e7dc60ad86c3e46287ce4a6 to your computer and use it in GitHub Desktop.
Pseudobulk aggregation, normalization & centering of single-cell RNA-seq counts with the edgeR R package
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(edgeR) | |
library(Seurat) | |
# download example Seurat object (2000 genes, 10000 cells) | |
object <- local({ | |
temp_file <- tempfile(".rds") | |
download.file("https://bioinf.wehi.edu.au/edgeR/UserGuideData/SeuratObj.rds", | |
temp_file) | |
readRDS(temp_file) | |
}) | |
# There are 13 samples | |
table(object@meta.data$group) | |
# with cells assigned to 7 clusters | |
table(object@meta.data$seurat_clusters) | |
# let's define three arbitrary samples as the control group | |
ctrl <- c("N_0019_total", "N_0021_total", "N_0064_total") | |
##------ Using edgeR | |
# aggreate into pseudo-bulk counts by group and cluster | |
y <- Seurat2PB(object, sample="group", cluster="seurat_clusters") | |
# Seurat's group info is now in the `sample` column | |
table(y$samples$sample) | |
y$samples$group <- factor( | |
ifelse(y$samples$sample %in% ctrl, "Ctrl", "Treatment") | |
) | |
# TMM normalization | |
y <- normLibSizes(y) | |
# normalized counts | |
cpms <- cpm(y, normalized.lib.sizes = TRUE, log = TRUE) | |
# subtract the mean of the control samples (within each cluster) from each gene | |
centred_cpms <- lapply(unique(y$samples$cluster), \(n) { | |
ctrl_group <- y$samples$group == "Ctrl" | |
this_cluster <- y$samples$cluster == n | |
cpms_ctrl <- rowMeans(cpms[, which(ctrl_group & this_cluster)]) | |
sweep(cpms[, which(this_cluster)], MARGIN = 1, STATS = cpms_ctrl, FUN = "-") | |
}) | |
# Concatenate the matrices (one for each cluster) into a single matrix | |
centred_cpms <- do.call(cbind, centred_cpms) | |
# Restore the original column ordering | |
centred_cpms <- centred_cpms[, colnames(y)] | |
dim(centred_cpms) # 13527 genes, 85 pseudo-bulk samples | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment