Skip to content

Instantly share code, notes, and snippets.

@lwaldron
Last active June 21, 2018 23:45
Show Gist options
  • Save lwaldron/47fb0c0bece56f58b762192c24117231 to your computer and use it in GitHub Desktop.
Save lwaldron/47fb0c0bece56f58b762192c24117231 to your computer and use it in GitHub Desktop.
Coerce (and optionally remove) all RaggedExperiments in a curatedTCGAData MAE.
simplifyTCGAData <- function(obj, removeRaggedExperiments=TRUE){
##This function will convert mutations to a genes x samples RangedSummarizedExperiment of 1 for non-silent mutations, 1 for silent or no mutation
##It will convert segmented copy number to copy number per gene, using a weighted average if there are non-disjunct ranges
suppressPackageStartupMessages({
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
library(GenomeInfoDb)
})
gn <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
gn <- keepStandardChromosomes(granges(gn), pruning.mode="coarse")
seqlevelsStyle(gn) <- "NCBI"
names(gn) <- mapIds(org.Hs.eg.db, names(gn), keytype = "ENTREZID", column = "SYMBOL")
##
weightedmean <- function(scores, ranges, qranges)
## weighted average score per query range
sum(scores * width(ranges)) / sum(width(ranges))
##
nonsilent <- function(scores, ranges, qranges)
any(scores != "Silent")
##
isRE <- function(x) vapply(experiments(x), function(y) is(y, "RaggedExperiment"), TRUE)
##
isMut <- function(x) grepl("Mutation", names(x))
##
for (i in which(isMut(obj))){
mutations <- qreduceAssay(obj[[i]], gn, nonsilent, "Variant_Classification")
rownames(mutations) <- names(gn)
mutations[is.na(mutations)] <- 0
remove.rows <- is.na(rownames(mutations))
mutations <- SummarizedExperiment(mutations[!remove.rows, ], rowRanges=gn[!remove.rows])
el <- ExperimentList(x=mutations)
names(el) <- paste0(names(obj)[i], "_simplified")
obj <- c(obj, el)
rm(el, mutations)
}
for (i in which(isRE(obj) & !isMut(obj))){
suppressWarnings(
cn <- qreduceAssay(obj[[i]], gn, weightedmean, "Segment_Mean")
)
rownames(cn) <- names(gn)
remove.rows <- is.na(rownames(cn))
cn <- SummarizedExperiment(cn[!remove.rows, ], rowRanges=gn[!remove.rows])
el <- ExperimentList(x=cn)
names(el) <- paste0(names(obj)[i], "_simplified")
obj <- c(obj, el)
}
if(removeRaggedExperiments){
obj <- obj[, , !isRE(obj)]
}
return(obj)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment