Skip to content

Instantly share code, notes, and snippets.

@const-ae
Created May 11, 2020 14:06
Show Gist options
  • Save const-ae/82c540d2a2cb53325512994400d1a3bc to your computer and use it in GitHub Desktop.
Save const-ae/82c540d2a2cb53325512994400d1a3bc to your computer and use it in GitHub Desktop.
Combine two SummarizedExperiments / SingleCellExperiments with partially overlapping genes into one. LICENCE: MIT
#' Combine two SummarizedExperiments / SingleCellExperiments with partially overlapping
#' genes into one
#'
#' Note: This ignores all but the first assay
#' Can handle column sparse and standard matrices
cbind_se <- function(se1, se2, assay_fill = 0, colData_fill = NA, rowData_fill = NA, post_fix = c("_1", "_2")){
stopifnot(length(post_fix) == 2)
# Merge assays based on rowNames
if( (is.null(rownames(se1)) || is.null(rownames(se2))) && nrow(se1) != nrow(se2)){
stop("To cbind SummarizedExperiments either rownames are needed or the ",
"number of rows of se1 and se2 must match.")
}else if( (is.null(rownames(se1)) || is.null(rownames(se2))) && nrow(se1) == nrow(se2)){
return(cbind(assay(se1), assay(se2)))
}
if(is.null(colnames(se1))) colnames(se1) <- seq_len(ncol(se1))
if(is.null(colnames(se2))) colnames(se2) <- seq_len(ncol(se2))
if(is.null(rownames(se1))) rownames(se1) <- seq_len(nrow(se1))
if(is.null(rownames(se2))) rownames(se2) <- seq_len(nrow(se2))
all_rownames <- union(rownames(se1), rownames(se2))
if(is(assay(se1), "dgCMatrix") && is(assay(se2), "dgCMatrix")){
new_assay_mat <- as(Matrix::spMatrix(nrow = length(all_rownames), ncol = ncol(se1) + ncol(se2)), "dgCMatrix")
}else if(is.matrix(se1) && is.matrix(se2)){
new_assay_mat <- matrix(assay_fill, nrow = length(all_rownames), ncol = ncol(se1) + ncol(se2))
}else{
stop("assays must be of same class")
}
rownames(new_assay_mat) <- all_rownames
se1_new_colnames <- rep_len(paste0(colnames(se1), post_fix[1]), ncol(se1))
se2_new_colnames <- rep_len(paste0(colnames(se2), post_fix[2]), ncol(se2))
colnames(new_assay_mat) <- c(se1_new_colnames, se2_new_colnames)
new_assay_mat[rownames(se1), seq_len(ncol(se1))] <- assay(se1)
new_assay_mat[rownames(se2), ncol(se1) + seq_len(ncol(se2))] <- assay(se2)
# Merge rowData
## Convert factors to characters
if(ncol(rowData(se1)) > 0){
rowData(se1)[,sapply(rowData(se1), is.factor)] <- lapply(rowData(se1)[,sapply(rowData(se1), is.factor),drop=FALSE], as.character)
}
if(ncol(rowData(se2)) > 0){
rowData(se2)[,sapply(rowData(se2), is.factor)] <- lapply(rowData(se2)[,sapply(rowData(se2), is.factor),drop=FALSE], as.character)
}
row_data_colnames <- union(colnames(rowData(se1)), colnames(rowData(se2)))
if(length(row_data_colnames) > 0){
new_row_data <- as.data.frame(setNames(lapply(row_data_colnames, function(n) rep(rowData_fill, nrow(new_assay_mat))), row_data_colnames))
rownames(new_row_data) <- all_rownames
new_row_data [rownames(se1), colnames(rowData(se1))] <- as.data.frame(rowData(se1))
new_row_data [rownames(se2), colnames(rowData(se2))] <- as.data.frame(rowData(se2))
}else{
new_row_data <- NULL
}
# Merge colData
## Convert factors to characters
if(ncol(colData(se1)) > 0){
colData(se1)[,sapply(colData(se1), is.factor)] <- lapply(colData(se1)[,sapply(colData(se1), is.factor),drop=FALSE], as.character)
}
if(ncol(colData(se2)) > 0){
colData(se2)[,sapply(colData(se2), is.factor)] <- lapply(colData(se2)[,sapply(colData(se2), is.factor),drop=FALSE], as.character)
}
col_data_colnames <- union(colnames(colData(se1)), colnames(colData(se2)))
if(length(col_data_colnames) > 0){
new_col_data <- as.data.frame(setNames(lapply(col_data_colnames, function(n) rep(colData_fill, ncol(new_assay_mat))), col_data_colnames))
rownames(new_col_data) <- c(se1_new_colnames, se2_new_colnames)
new_col_data [se1_new_colnames, colnames(colData(se1))] <- as.data.frame(colData(se1))
new_col_data [se2_new_colnames, colnames(colData(se2))] <- as.data.frame(colData(se2))
}else{
new_col_data <- NULL
}
new_row_data$BeforeMerge <- dplyr::case_when(rownames(new_assay_mat) %in% rownames(se1) &
rownames(new_assay_mat) %in% rownames(se2) ~ paste0(post_fix[1], "-", post_fix[2]),
rownames(new_assay_mat) %in% rownames(se1) ~ post_fix[1],
rownames(new_assay_mat) %in% rownames(se2) ~ post_fix[2],
TRUE ~ "")
new_col_data$BeforeMerge <- c(rep(post_fix[1], ncol(se1)), rep(post_fix[2], ncol(se2)))
SummarizedExperiment(setNames(list(new_assay_mat), assayNames(se1)),
colData = new_col_data, rowData = new_row_data)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment