Skip to content

Instantly share code, notes, and snippets.

@pavlin-policar
Last active September 16, 2019 13:26
Show Gist options
  • Save pavlin-policar/723acdf47edfb3428f5ec6f15c2b23db to your computer and use it in GitHub Desktop.
Save pavlin-policar/723acdf47edfb3428f5ec6f15c2b23db to your computer and use it in GitHub Desktop.
Utility functions to read/write H5AD files from R
read.h5ad = function (fname) {
#' Read an H5AD file
#'
#' @param fname str: The path to the H5AD file
library(reticulate)
library(Matrix)
ad = import("anndata", convert = FALSE)
sp = import("scipy.sparse", convert = FALSE)
# Extract the file name from the file path and use that as the project name
project_name = tools::file_path_sans_ext(basename(fname))
data = ad$read_h5ad(fname)
if (py_to_r(sp$issparse(data$X))) {
# Convert to CSC format
data$X = data$X$tocsc()
# Read in the matrix where rows are cells and columns are genes
x = sparseMatrix(
i = as.numeric(x = data$X$indices),
p = as.numeric(x = data$X$indptr),
x = as.numeric(x = data$X$data),
dims = c(py_to_r(data$X$shape[0]), py_to_r(data$X$shape[1])),
index1 = F
)
} else {
x = py_to_r(data$X)
}
# We've got to assign cell and gene names onto the x matrix
colnames(x) = py_to_r(data$var_names$values)
rownames(x) = py_to_r(data$obs_names$values)
# Read in gene/cell metadata
obs_data = py_to_r(data$obs)
var_data = py_to_r(data$var)
# Read in unstructed metadata
uns_data = py_to_r(data$uns)
return(list(name = project_name, x = x, cell_data = obs_data,
gene_data = var_data, metadata = uns_data))
}
write.h5ad = function(fname, x, cell_data = NULL, gene_data = NULL, cell_matrices = NULL, gene_matrices = NULL, metadata = NULL) {
#' Write an expression matrix into an H5AD file
#'
#' @param x matrix: An expression matrix with cells in rows and genes in columns
#' @param cell_data data.frame: Contains a row for each cell in the expression matrix
#' @param gene_data data.frame: Contains a row for each gene in the expression matrix
#' @param cell_matrices list of matrices
#' @param gene_matrices list of matrices
#' @param metadata list: Additional metadata such as method parameters
library(reticulate)
ad = import("anndata", convert = FALSE)
# Constuct a python instance of AnnData
adata = ad$AnnData(x)
# Add cell data if exists
if (!is.null(cell_data)) {
if (dim(cell_data)[1] != dim(x)[1]) {
stop("Please ensure that `cell_data` contains the same number of rows as there are cells.")
}
# If the data frame is empty, we can't add it
if (dim(cell_data)[2] > 0) {
adata$obs = cell_data
}
}
# Add gene data if exists
if (!is.null(gene_data)) {
if (dim(gene_data)[1] != dim(x)[2]) {
stop("Please ensure that `gene_data` contains the same number of rows as there are genes.")
}
# If the data frame is empty, we can't add it
if (dim(gene_data)[2] > 0) {
adata$var = gene_data
}
}
# Add cell matrices if exists
if (!is.null(cell_matrices)) {
for (key in names(cell_matrices)) {
matrix = cell_matrices[[key]]
if (dim(matrix)[1] != dim(x)[1]) {
stop("Please ensure that all `cell_matrices` contains the same number of rows as there are cells.")
}
# If the data frame is empty, we can't add it
if (dim(matrix)[2] > 0) {
tmp = list()
tmp[[key]] = as.matrix(matrix)
adata$obsm$update(tmp)
}
}
}
# Add gene matrices if exists
if (!is.null(gene_matrices)) {
for (key in names(gene_matrices)) {
matrix = gene_matrices[[key]]
if (dim(matrix)[1] != dim(x)[2]) {
stop("Please ensure that all `gene_matrices` contains the same number of rows as there are cells.")
}
# If the data frame is empty, we can't add it
if (dim(matrix)[2] > 0) {
tmp = list()
tmp[[key]] = as.matrix(matrix)
adata$varm$update(tmp)
}
}
}
# Add metadata if exists
if (!is.null(metadata)) {
adata$uns$update(metadata)
}
adata$write_h5ad(fname)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment