Skip to content

Instantly share code, notes, and snippets.

@lwaldron
Last active June 22, 2018 13:31
Show Gist options
  • Save lwaldron/63b403803e91b3a3ce72592fa6e85f79 to your computer and use it in GitHub Desktop.
Save lwaldron/63b403803e91b3a3ce72592fa6e85f79 to your computer and use it in GitHub Desktop.
Add ranges to RNA-seq and microarray SummarizedExperiments where rownames are gene symbols in a MultiAssayExperiment
.cMAE <- function(mae, x, name="newelement"){
el <- ExperimentList(tmp=x)
names(el)[1] <- name
c(mae, el)
}
.hasSymbols <- function(x){
mean(c(FALSE, grepl("^[A-Z0-9]{1,6}|^C[0-9]orf[0-9]{1,4}", rownames(x))), na.rm=TRUE) > 0.9
}
.isSummarizedExperiment <- function(x){
is(x, "SummarizedExperiment") & !is(x, "RangedSummarizedExperiment")
}
.makeListRanges <- function(x, gn){
res <- list(unmapped=x[!x %in% names(gn)])
x <- x[x %in% names(gn)]
gn <- gn[match(x, names(gn))]
res$mapped <- gn
return(res)
}
.getRangesOfSYMBOLS <- function(x){
suppressPackageStartupMessages({
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
library(GenomeInfoDb)
})
entrez <- mapIds(org.Hs.eg.db, x, keytype = "SYMBOL", column = "ENTREZID")
gn <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
names(gn) <- mapIds(org.Hs.eg.db, names(gn), keytype="ENTREZID", column = "SYMBOL")
gn <- keepStandardChromosomes(granges(gn), pruning.mode="coarse")
seqlevelsStyle(gn) <- "NCBI"
return( .makeListRanges(x, gn) )
## returns a list of length 2: "unmapped" is a character vector providing
## unmapped symbols, "mapped" is a GRanges object with ranges of mapped symbols.
}
symbolsToRanges <- function(obj, removeOriginals = TRUE){
## obj: a MultiAssayExperiment
## any SummarizedExperiment elements with gene symbols as rows will have ranges added
## symbols where ranges can't be found are kept as another SummarizedExperiment
can.fix <- vapply(experiments(obj), function(y){
.hasSymbols(y) & .isSummarizedExperiment(y)
}, TRUE)
##
for (i in which(can.fix)){
lookup <- .getRangesOfSYMBOLS(rownames(obj[[i]]))
rse <- obj[[i]][names(lookup$mapped), ]
rowRanges(rse) <- lookup$mapped
obj <- .cMAE(obj, rse, name=paste0(names(obj)[i], "_ranged"))
if(length(lookup$unmapped > 0)){
se <- obj[[i]][lookup$unmapped, ]
obj <- .cMAE(obj, se, name=paste0(names(obj)[i], "_unranged"))
}
}
if(removeOriginals & any(can.fix))
obj <- obj[, , -which(can.fix)]
return(obj)
## example
## symbolsToRanges(miniACC)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment