Skip to content

Instantly share code, notes, and snippets.

@lwaldron
Created March 18, 2016 15:59
Show Gist options
  • Save lwaldron/07488d38bce70f354406 to your computer and use it in GitHub Desktop.
Save lwaldron/07488d38bce70f354406 to your computer and use it in GitHub Desktop.
An example of a MultiAssayExperiment setup with sample naming problems
library(MultiAssayExperiment)
load('nci60dat.rda')
rnadat <- Biobase::ExpressionSet(assayData=rna.arraydat)
proteinMSdat <- Biobase::ExpressionSet(assayData=protein.ms.arraydat)
deepProteinMSdat <- Biobase::ExpressionSet(assayData=deep.protein.ms.arraydat)
kinaseProteinMSdat <- Biobase::ExpressionSet(assayData=kinase.protein.ms.arraydat)
ExpList <- list(
"rna" = rnadat,
"protein MS" = proteinMSdat,
"deep protein MS" = deepProteinMSdat,
"kinase protein MS" = kinaseProteinMSdat
)
names(ExpList) <- tolower(names(ExpList))
# sample map in the RNA assay
rnamap = as.data.frame(cbind(rownames(pdat),rownames(pdat),rep("RNA",dim(pdat)[1])))
colnames(rnamap) = c('primary','assay','assayname')
rnamap = rnamap[!is.na(rnamap$assay), ]
# sample map in the MS-based proteomics assay
phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
function(x) strsplit(x,":")[[1]][2])))
assayCellLines = unlist(lapply(colnames(protein.ms.arraydat),
function(x) strsplit(x,"_")[[1]][3]))
tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
colnames(protein.ms.arraydat),rep("protein MS",length(assayCellLines))))
tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
colnames(tmp) = c('primary','assay','assayname')
proteinMSmap = tmp
# sample map in the MS-based deep proteomics assay
phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
function(x) strsplit(x,":")[[1]][2])))
assayCellLines = unlist(lapply(colnames(deep.protein.ms.arraydat),
function(x) strsplit(x,"_")[[1]][3]))
tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
colnames(deep.protein.ms.arraydat),rep("deep protein MS",length(assayCellLines))))
tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
colnames(tmp) = c('primary','assay','assayname')
deepProteinMSmap = tmp
# sample map in the MS-based deep proteomics assay
phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
function(x) strsplit(x,":")[[1]][2])))
assayCellLines = unlist(lapply(colnames(kinase.protein.ms.arraydat),
function(x) strsplit(x,"_")[[1]][3]))
tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
colnames(kinase.protein.ms.arraydat),rep("kinase protein MS",length(assayCellLines))))
tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
colnames(tmp) = c('primary','assay','assayname')
kinaseProteinMSmap = tmp
listmap <- list(rnamap,proteinMSmap,deepProteinMSmap,kinaseProteinMSmap)
### the following is needed but might be forgotten (note tolower):
# names(listmap) <- tolower(c("RNA", "protein MS", "deep protein MS", "kinase protein MS"))
### the following dropping of samples gets you partway there:
# for (i in 1:length(ExpList)){
# listmap[[i]] <- listmap[[i]][listmap[[i]][, 1] %in% rownames(pdat), ]
# ExpList[[i]] <- ExpList[[i]][, colnames(ExpList[[i]]) %in% listmap[[i]][, 2]]
# }
# would work if listmap is named:
# dfmap <- listToMap(listmap)
dfmap <- do.call(rbind, listmap)
## This doesn't work because of different naming conventions between
## between rownames(pdat) (start with e.g. BR:) and dfmap[, 1] (some
## of which are missing the BR:).
myMultiAssay <- MultiAssayExperiment(Elist=ExpList,
pData=pdat, sampleMap=dfmap)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment