lwaldron/MAE_notquite.r

## MAE_notquite.r
library(MultiAssayExperiment)
load('nci60dat.rda')


rnadat <- Biobase::ExpressionSet(assayData=rna.arraydat)
proteinMSdat <- Biobase::ExpressionSet(assayData=protein.ms.arraydat)
deepProteinMSdat <- Biobase::ExpressionSet(assayData=deep.protein.ms.arraydat)
kinaseProteinMSdat <- Biobase::ExpressionSet(assayData=kinase.protein.ms.arraydat)

ExpList <- list(
 "rna" = rnadat,
 "protein MS" = proteinMSdat,
 "deep protein MS" = deepProteinMSdat,
 "kinase protein MS" = kinaseProteinMSdat
)
names(ExpList) <- tolower(names(ExpList))

# sample map in the RNA assay
rnamap = as.data.frame(cbind(rownames(pdat),rownames(pdat),rep("RNA",dim(pdat)[1])))
colnames(rnamap) = c('primary','assay','assayname')
rnamap = rnamap[!is.na(rnamap$assay), ]

# sample map in the MS-based proteomics assay
phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
 function(x) strsplit(x,":")[[1]][2])))
assayCellLines = unlist(lapply(colnames(protein.ms.arraydat),
 function(x) strsplit(x,"_")[[1]][3]))
tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
 colnames(protein.ms.arraydat),rep("protein MS",length(assayCellLines))))
tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
colnames(tmp) = c('primary','assay','assayname')
proteinMSmap = tmp

# sample map in the MS-based deep proteomics assay
phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
 function(x) strsplit(x,":")[[1]][2])))
assayCellLines = unlist(lapply(colnames(deep.protein.ms.arraydat),
 function(x) strsplit(x,"_")[[1]][3]))
tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
 colnames(deep.protein.ms.arraydat),rep("deep protein MS",length(assayCellLines))))
tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
colnames(tmp) = c('primary','assay','assayname')
deepProteinMSmap = tmp

# sample map in the MS-based deep proteomics assay
phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
 function(x) strsplit(x,":")[[1]][2])))
assayCellLines = unlist(lapply(colnames(kinase.protein.ms.arraydat),
 function(x) strsplit(x,"_")[[1]][3]))
tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
 colnames(kinase.protein.ms.arraydat),rep("kinase protein MS",length(assayCellLines))))
tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
colnames(tmp) = c('primary','assay','assayname')
kinaseProteinMSmap = tmp

listmap <- list(rnamap,proteinMSmap,deepProteinMSmap,kinaseProteinMSmap)

### the following is needed but might be forgotten (note tolower):
# names(listmap) <- tolower(c("RNA", "protein MS", "deep protein MS", "kinase protein MS"))

### the following dropping of samples gets you partway there:
# for (i in 1:length(ExpList)){
#     listmap[[i]] <- listmap[[i]][listmap[[i]][, 1] %in% rownames(pdat), ]
#     ExpList[[i]] <- ExpList[[i]][, colnames(ExpList[[i]]) %in% listmap[[i]][, 2]]
# }

# would work if listmap is named:
# dfmap <- listToMap(listmap)
dfmap <- do.call(rbind, listmap)


## This doesn't work because of different naming conventions between
## between rownames(pdat) (start with e.g. BR:) and dfmap[, 1] (some
## of which are missing the BR:).
myMultiAssay <- MultiAssayExperiment(Elist=ExpList,
  pData=pdat, sampleMap=dfmap)
	library(MultiAssayExperiment)
	load('nci60dat.rda')


	rnadat <- Biobase::ExpressionSet(assayData=rna.arraydat)
	proteinMSdat <- Biobase::ExpressionSet(assayData=protein.ms.arraydat)
	deepProteinMSdat <- Biobase::ExpressionSet(assayData=deep.protein.ms.arraydat)
	kinaseProteinMSdat <- Biobase::ExpressionSet(assayData=kinase.protein.ms.arraydat)

	ExpList <- list(
	"rna" = rnadat,
	"protein MS" = proteinMSdat,
	"deep protein MS" = deepProteinMSdat,
	"kinase protein MS" = kinaseProteinMSdat
	)
	names(ExpList) <- tolower(names(ExpList))

	# sample map in the RNA assay
	rnamap = as.data.frame(cbind(rownames(pdat),rownames(pdat),rep("RNA",dim(pdat)[1])))
	colnames(rnamap) = c('primary','assay','assayname')
	rnamap = rnamap[!is.na(rnamap$assay), ]

	# sample map in the MS-based proteomics assay
	phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
	function(x) strsplit(x,":")[[1]][2])))
	assayCellLines = unlist(lapply(colnames(protein.ms.arraydat),
	function(x) strsplit(x,"_")[[1]][3]))
	tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
	colnames(protein.ms.arraydat),rep("protein MS",length(assayCellLines))))
	tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
	colnames(tmp) = c('primary','assay','assayname')
	proteinMSmap = tmp

	# sample map in the MS-based deep proteomics assay
	phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
	function(x) strsplit(x,":")[[1]][2])))
	assayCellLines = unlist(lapply(colnames(deep.protein.ms.arraydat),
	function(x) strsplit(x,"_")[[1]][3]))
	tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
	colnames(deep.protein.ms.arraydat),rep("deep protein MS",length(assayCellLines))))
	tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
	colnames(tmp) = c('primary','assay','assayname')
	deepProteinMSmap = tmp

	# sample map in the MS-based deep proteomics assay
	phenoCellLines = gsub('_','',unlist(lapply(rownames(pdat),
	function(x) strsplit(x,":")[[1]][2])))
	assayCellLines = unlist(lapply(colnames(kinase.protein.ms.arraydat),
	function(x) strsplit(x,"_")[[1]][3]))
	tmp = as.data.frame(cbind(phenoCellLines[match(assayCellLines,phenoCellLines)],
	colnames(kinase.protein.ms.arraydat),rep("kinase protein MS",length(assayCellLines))))
	tmp = tmp[is.na(match(assayCellLines,phenoCellLines))==F,]
	colnames(tmp) = c('primary','assay','assayname')
	kinaseProteinMSmap = tmp

	listmap <- list(rnamap,proteinMSmap,deepProteinMSmap,kinaseProteinMSmap)

	### the following is needed but might be forgotten (note tolower):
	# names(listmap) <- tolower(c("RNA", "protein MS", "deep protein MS", "kinase protein MS"))

	### the following dropping of samples gets you partway there:
	# for (i in 1:length(ExpList)){
	# listmap[[i]] <- listmap[[i]][listmap[[i]][, 1] %in% rownames(pdat), ]
	# ExpList[[i]] <- ExpList[[i]][, colnames(ExpList[[i]]) %in% listmap[[i]][, 2]]
	# }

	# would work if listmap is named:
	# dfmap <- listToMap(listmap)
	dfmap <- do.call(rbind, listmap)


	## This doesn't work because of different naming conventions between
	## between rownames(pdat) (start with e.g. BR:) and dfmap[, 1] (some
	## of which are missing the BR:).
	myMultiAssay <- MultiAssayExperiment(Elist=ExpList,
	pData=pdat, sampleMap=dfmap)