Skip to content

Instantly share code, notes, and snippets.

@danmackinlay
Created February 1, 2015 01:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danmackinlay/9e89e65bfc58fded46ae to your computer and use it in GitHub Desktop.
Save danmackinlay/9e89e65bfc58fded46ae to your computer and use it in GitHub Desktop.
pas ssparse matrices between R and Python
"""
input and output some tricky matrix data in a cross-language format
"""
from scipy.sparse import coo_matrix, dok_matrix, csc_matrix
import tables
import numpy as np
def write_sparse_hdf(handle, group, data, colnames=None, filt=None):
"""
sparse CSC (compressed sparse colums) matrices via hdf5
32 bit for now, ok?
"""
data_atom_type = tables.Float32Atom()
if np.issubdtype(data.dtype, int):
data_atom_type = tables.Int32Atom()
handle.create_carray(group,'v_indices',
atom=tables.Int32Atom(), shape=data.indices.shape,
title="indices",
filters=filt)[:] = data.indices
handle.create_carray(group,'v_indptr',
atom=tables.Int32Atom(), shape=data.indptr.shape,
title="index ptr",
filters=filt)[:] = data.indptr
handle.create_carray(group,'v_data',
atom=data_atom_type, shape=data.data.shape,
title="data",
filters=filt)[:] = data.data
handle.create_carray(group,'v_datadims',
atom=tables.Int32Atom(), shape=(2,),
title="data dims",
filters=filt)[:] = data.shape
if colnames:
handle.create_carray(group,'v_col_names',
atom=tables.StringAtom(
max([len(n) for n in colnames])
), shape=(len(colnames),),
title="col names",
filters=filt)[:] = colnames
def read_sparse_hdf(handle, group):
"""
sparse CSC (compressed sparse colums) matrices via hdf5
"""
shape = group.get_node(group,'v_datadims')
return csc_matrix(
(
group.get_node(group,'v_data'),
(
group.get_node(group,'v_indices'),
group.get_node(group,'v_indptr')
)
),
shape=shape)
# if colnames:
# handle.create_carray(group,'v_col_names',
# atom=tables.StringAtom(
# max([len(n) for n in colnames])
# ), shape=(len(colnames),),
# title="col names",
# filters=filt)[:] = colnames
library(rhdf5)
load.sparse.hdf = function (filename, path) {
idx = as.vector(h5read(filename, paste(path, "v_indices", sep="/")))
idxptr = as.vector(h5read(filename, paste(path, "v_indptr", sep="/")))
vals = as.vector(h5read(filename, paste(path, "v_data", sep="/")))
dims = as.vector(h5read(filename, paste(path, "v_datadims", sep="/")))
col.names = h5read(filename, paste(path, "v_col_names", sep="/"))
data = sparseMatrix(
i=idx,
p=idxptr,
x=vals,
dims=dims,
index1=F
)
colnames(data)=col.names
return(data)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment