lcolladotor/explore_berto_et_al_2019.R

## explore_berto_et_al_2019.R
## Related to https://docs.google.com/document/d/1uDdq0W9eAEnyPLf_gKJRKI8TGpujXIhGhaHBuLdmmRE/edit?usp=sharing
## and Berto et al 2019 https://www.pnas.org/content/116/48/24334

library("SummarizedExperiment")

## The two data sets we'll use
datasets <- c("NeuN", "OLIG2")

## Read in data for both datasets and build a SummarizedExperiment object
rse_list <- lapply(datasets, function(dataid) {
    ## Read in the data
    counts <- read.table(paste0(dataid, "_Primates_AdjExp.txt"))
    pheno <- read.table(paste0(dataid, "_Primates_pheno.txt"))

    ## Make a table for the gene information we have
    gene_info <- DataFrame(symbol = rownames(counts))

    ## Build a RangedSummarizedExperiment object
    SummarizedExperiment(
        assays = list(counts = counts),
        rowData = gene_info,
        colData = pheno
    )
})
names(rse_list) <- datasets

## Can't run due to different number of genes
do.call(cbind, rse_list)

## Find the unique genes across both datasets
genes <- unique(unlist(lapply(rse_list, rownames)))
genes
# [1] 9037

## Make it such that we have the same number of genes in each dataset
rse_uniform_list <- lapply(rse_list, function(rse) {
    ## Make an empty large matrix
    new_mat <- matrix(0, nrow = length(genes), ncol = ncol(rse))

    ## Add the gene and sample names
    rownames(new_mat) <- genes
    colnames(new_mat) <- colnames(rse)

    ## Find the position for our current genes among the merged set of genes
    m <- match(rownames(rse), genes)

    ## Replace the 0s in the matrix with the actual counts/data we have
    new_mat[m, ] <- as.matrix(assays(rse)$counts)

    ## Make a new SummarizedExperiment object
    SummarizedExperiment(
        assays = list(counts = new_mat),
        rowData = DataFrame(symbol = genes),
        colData = colData(rse)
    )
})

## We can now combine them into one
rse <- do.call(cbind, rse_uniform_list)

## Add the "dataset" where each sample comes from
rse$Dataset <- rep(datasets, sapply(rse_uniform_list, ncol))

## HumAge is really a categorical variable, so let's treat it as such
rse$HumAge <- as.factor(rse$HumAge)

## Now we can use iSEE to interactively explore the merged data
library("iSEE")
iSEE(rse)
	## Related to https://docs.google.com/document/d/1uDdq0W9eAEnyPLf_gKJRKI8TGpujXIhGhaHBuLdmmRE/edit?usp=sharing
	## and Berto et al 2019 https://www.pnas.org/content/116/48/24334

	library("SummarizedExperiment")

	## The two data sets we'll use
	datasets <- c("NeuN", "OLIG2")

	## Read in data for both datasets and build a SummarizedExperiment object
	rse_list <- lapply(datasets, function(dataid) {
	## Read in the data
	counts <- read.table(paste0(dataid, "_Primates_AdjExp.txt"))
	pheno <- read.table(paste0(dataid, "_Primates_pheno.txt"))

	## Make a table for the gene information we have
	gene_info <- DataFrame(symbol = rownames(counts))

	## Build a RangedSummarizedExperiment object
	SummarizedExperiment(
	assays = list(counts = counts),
	rowData = gene_info,
	colData = pheno
	)
	})
	names(rse_list) <- datasets

	## Can't run due to different number of genes
	do.call(cbind, rse_list)

	## Find the unique genes across both datasets
	genes <- unique(unlist(lapply(rse_list, rownames)))
	genes
	# [1] 9037

	## Make it such that we have the same number of genes in each dataset
	rse_uniform_list <- lapply(rse_list, function(rse) {
	## Make an empty large matrix
	new_mat <- matrix(0, nrow = length(genes), ncol = ncol(rse))

	## Add the gene and sample names
	rownames(new_mat) <- genes
	colnames(new_mat) <- colnames(rse)

	## Find the position for our current genes among the merged set of genes
	m <- match(rownames(rse), genes)

	## Replace the 0s in the matrix with the actual counts/data we have
	new_mat[m, ] <- as.matrix(assays(rse)$counts)

	## Make a new SummarizedExperiment object
	SummarizedExperiment(
	assays = list(counts = new_mat),
	rowData = DataFrame(symbol = genes),
	colData = colData(rse)
	)
	})

	## We can now combine them into one
	rse <- do.call(cbind, rse_uniform_list)

	## Add the "dataset" where each sample comes from
	rse$Dataset <- rep(datasets, sapply(rse_uniform_list, ncol))

	## HumAge is really a categorical variable, so let's treat it as such
	rse$HumAge <- as.factor(rse$HumAge)

	## Now we can use iSEE to interactively explore the merged data
	library("iSEE")
	iSEE(rse)