Skip to content

Instantly share code, notes, and snippets.

@lcolladotor
Last active February 12, 2021 19:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lcolladotor/da5ec764b522789cdee9da79f231a40a to your computer and use it in GitHub Desktop.
Save lcolladotor/da5ec764b522789cdee9da79f231a40a to your computer and use it in GitHub Desktop.
## Related to https://docs.google.com/document/d/1uDdq0W9eAEnyPLf_gKJRKI8TGpujXIhGhaHBuLdmmRE/edit?usp=sharing
## and Berto et al 2019 https://www.pnas.org/content/116/48/24334
library("SummarizedExperiment")
## The two data sets we'll use
datasets <- c("NeuN", "OLIG2")
## Read in data for both datasets and build a SummarizedExperiment object
rse_list <- lapply(datasets, function(dataid) {
## Read in the data
counts <- read.table(paste0(dataid, "_Primates_AdjExp.txt"))
pheno <- read.table(paste0(dataid, "_Primates_pheno.txt"))
## Make a table for the gene information we have
gene_info <- DataFrame(symbol = rownames(counts))
## Build a RangedSummarizedExperiment object
SummarizedExperiment(
assays = list(counts = counts),
rowData = gene_info,
colData = pheno
)
})
names(rse_list) <- datasets
## Can't run due to different number of genes
do.call(cbind, rse_list)
## Find the unique genes across both datasets
genes <- unique(unlist(lapply(rse_list, rownames)))
genes
# [1] 9037
## Make it such that we have the same number of genes in each dataset
rse_uniform_list <- lapply(rse_list, function(rse) {
## Make an empty large matrix
new_mat <- matrix(0, nrow = length(genes), ncol = ncol(rse))
## Add the gene and sample names
rownames(new_mat) <- genes
colnames(new_mat) <- colnames(rse)
## Find the position for our current genes among the merged set of genes
m <- match(rownames(rse), genes)
## Replace the 0s in the matrix with the actual counts/data we have
new_mat[m, ] <- as.matrix(assays(rse)$counts)
## Make a new SummarizedExperiment object
SummarizedExperiment(
assays = list(counts = new_mat),
rowData = DataFrame(symbol = genes),
colData = colData(rse)
)
})
## We can now combine them into one
rse <- do.call(cbind, rse_uniform_list)
## Add the "dataset" where each sample comes from
rse$Dataset <- rep(datasets, sapply(rse_uniform_list, ncol))
## HumAge is really a categorical variable, so let's treat it as such
rse$HumAge <- as.factor(rse$HumAge)
## Now we can use iSEE to interactively explore the merged data
library("iSEE")
iSEE(rse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment