Skip to content

Instantly share code, notes, and snippets.

@ATpoint
Last active April 19, 2021 23:53
Show Gist options
  • Save ATpoint/44f9727e452e42c72a0bf72663708dd4 to your computer and use it in GitHub Desktop.
Save ATpoint/44f9727e452e42c72a0bf72663708dd4 to your computer and use it in GitHub Desktop.
library(recount)
library(edgeR)
#/ Get the counts from GTEx via recount as a SummarizedExperiment
#/ => 1.3GB file
options(timeout=600)
download_study("SRP012682", type = "rse-gene")
load(file.path("SRP012682", "rse_gene.Rdata"))
#/ remove whitespaces in tissue names:
rse_gene$tissue <- gsub(" ", "_", rse_gene$smts)
#/ see the tissues included:
table(rse_gene$tissue)
#/ make DGEList
y <- DGEList(counts = assay(rse_gene, "counts"),
group = rse_gene$tissue)
#/ Subset the entire dataset to three tissues to make the process faster and less
#/ memory intensive, for the sake of this post:
y <- y[,y$samples$group %in% c("Heart", "Lung", "Pancreas")]
#/ filter lowly-expressed genes:
y <- y[filterByExpr(y, group=y$samples$group),]
#/ naive CPMs, corrected only for library size:
cpm_by_group_naive <- edgeR::cpmByGroup(y, log=TRUE)
#/ and with the TMMs, corrected for composition:
y <- calcNormFactors(y)
cpm_by_group_TMM <- edgeR::cpmByGroup(y, log=TRUE)
#/ Lets look at lung vs pancreas:
naive <- cpm_by_group_naive[,c("Lung", "Pancreas")]
TMM <- cpm_by_group_TMM[,c("Lung", "Pancreas")]
#/ Make the MA-plot using smoothScatter:
par(mfrow=c(1,2))
for(i in c("naive", "TMM")){
g1 <- get(i)[,1]
g2 <- get(i)[,2]
smoothScatter(x = 0.5 * (g1+g2), # average expr
y = g1-g2, # fold change
xlab = "average log expression",
ylab = "log fold change",
main = i,
ylim=c(-6,6))
abline(h=0, col="red")
abline(h=log2(2), col="red", lty=2)
abline(h=-log2(2), col="red", lty=2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment