Skip to content

Instantly share code, notes, and snippets.

@pfh
Last active May 1, 2024 09:54
Show Gist options
  • Save pfh/e1a51db030d4bf7de20c0ce673bbb2ff to your computer and use it in GitHub Desktop.
Save pfh/e1a51db030d4bf7de20c0ce673bbb2ff to your computer and use it in GitHub Desktop.
Trawling datasets with "recount"
remotes::install_github("MonashBioinformaticsPlatform/varistran")
BiocManager::install("recount")
library(recount)
library(tidyverse)
min_samples <- 9
max_samples <- 12
candidates <- recount_abstract |>
as_tibble() |>
filter(project %in% recount::recount_url$project) |>
dplyr::filter(number_samples >= min_samples, number_samples <= max_samples, !is.na(abstract))
# Press enter to see next heatmap
for(i in rev(seq_len(nrow(candidates)))) {
print(as.list(candidates[i,]))
name <- candidates$project[i]
if (!file.exists(name)) download_study(name)
load(file.path(name, "rse_gene.Rdata"))
if (ncol(rse_gene) < min_samples || ncol(rse_gene) >= max_samples) {
cat("Lies!\n\n")
next
}
vmat <- assay(read_counts(rse_gene)) |> varistran::vst()
rownames(vmat) <- rowData(rse_gene)$symbol |> map_chr(paste,collapse="/")
varistran::plot_heatmap(vmat, n=50) |> print()
readline()
}
# Some interesting datasets:
# ERP002021 -- 3x4
# SRP067529 -- 3x3
# SRP067469 -- 3x3, outlier and batch effect
# SRP066934 -- 3x3, outlier
name <- "SRP067469"
filter(recount_abstract, project==name) |> as.list()
load(file.path(name, "rse_gene.Rdata"))
vmat <- assay(read_counts(rse_gene)) |> varistran::vst()
colnames(vmat) <- colData(rse_gene)$title
rownames(vmat) <- rowData(rse_gene)$symbol |> map_chr(paste,collapse="/")
varistran::plot_heatmap(vmat, n=50)
varistran::plot_biplot(vmat, n_features=5)
# Batch effect is very evident in the two similar conditions
varistran::plot_heatmap(vmat[,4:9], n=50)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment