Skip to content

Instantly share code, notes, and snippets.

@tomsing1
Last active June 10, 2025 18:51
Show Gist options
  • Save tomsing1/41d30dda3e7dc60ad86c3e46287ce4a6 to your computer and use it in GitHub Desktop.
Save tomsing1/41d30dda3e7dc60ad86c3e46287ce4a6 to your computer and use it in GitHub Desktop.
Pseudobulk aggregation, normalization & centering of single-cell RNA-seq counts with the edgeR R package
library(edgeR)
library(Seurat)
# download example Seurat object (2000 genes, 10000 cells)
object <- local({
temp_file <- tempfile(".rds")
download.file("https://bioinf.wehi.edu.au/edgeR/UserGuideData/SeuratObj.rds",
temp_file)
readRDS(temp_file)
})
# There are 13 samples
table(object@meta.data$group)
# with cells assigned to 7 clusters
table(object@meta.data$seurat_clusters)
# let's define three arbitrary samples as the control group
ctrl <- c("N_0019_total", "N_0021_total", "N_0064_total")
##------ Using edgeR
# aggreate into pseudo-bulk counts by group and cluster
y <- Seurat2PB(object, sample="group", cluster="seurat_clusters")
# Seurat's group info is now in the `sample` column
table(y$samples$sample)
y$samples$group <- factor(
ifelse(y$samples$sample %in% ctrl, "Ctrl", "Treatment")
)
# TMM normalization
y <- normLibSizes(y)
# normalized counts
cpms <- cpm(y, normalized.lib.sizes = TRUE, log = TRUE)
# subtract the mean of the control samples (within each cluster) from each gene
centred_cpms <- lapply(unique(y$samples$cluster), \(n) {
ctrl_group <- y$samples$group == "Ctrl"
this_cluster <- y$samples$cluster == n
cpms_ctrl <- rowMeans(cpms[, which(ctrl_group & this_cluster)])
sweep(cpms[, which(this_cluster)], MARGIN = 1, STATS = cpms_ctrl, FUN = "-")
})
# Concatenate the matrices (one for each cluster) into a single matrix
centred_cpms <- do.call(cbind, centred_cpms)
# Restore the original column ordering
centred_cpms <- centred_cpms[, colnames(y)]
dim(centred_cpms) # 13527 genes, 85 pseudo-bulk samples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment