Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# -*- coding: utf-8 -*-
import scipy.sparse as sps
import numpy as np
from StringIO import StringIO
def _np_savez(compressed=False, *args, **kwds):
sio = StringIO()
if compressed:
np.savez_compressed(sio, *args, **kwds)
else:
np.savez(sio, *args, **kwds)
sio.seek(0)
return sio.read()
def save_sparse_matrix(x, numpy_savez_compression=False):
if sps.isspmatrix_csc(x):
ret = _save_sparse_matrix_csc(x)
elif sps.isspmatrix_csr(x):
ret = _save_sparse_matrix_csr(x)
elif sps.isspmatrix_coo(x):
ret = _save_sparse_matrix_coo(x)
else:
ret = _save_sparse_matrix_coo(x)
return _np_savez(compressed=numpy_savez_compression, **ret)
def load_sparse_matrix(data):
obj = np.load(StringIO(data))
type_ = obj['type']
if type_ == 'csc':
return _load_sparse_matrix_csc(obj)
elif type_ == 'csr':
return _load_sparse_matrix_csr(obj)
elif type_ == 'coo':
return _load_sparse_matrix_coo(obj)
raise ValueError('invalid type')
def _save_sparse_matrix_coo(x):
xx = x.tocoo()
return {
'type': 'coo',
'shape': xx.shape,
'ri': xx.row,
'ci': xx.col,
'data': xx.data,
}
def _load_sparse_matrix_coo(obj):
return sps.coo_matrix(
(np.array(obj['data'],dtype=np.float64), (obj['ri'], obj['ci'])),
shape=obj['shape'],
)
def _save_sparse_matrix_csc(x):
xx = x.tocsc()
return {
'type': 'csc',
'shape': xx.shape,
'ix': xx.indices,
'ip': xx.indptr,
'data': xx.data,
}
def _load_sparse_matrix_csc(obj):
return sps.csc_matrix(
(np.array(obj['data'],dtype=np.float64), obj['ix'], obj['ip']),
shape=obj['shape'],
)
def _save_sparse_matrix_csr(x):
xx = x.tocsr()
return {
'type': 'csr',
'shape': xx.shape,
'ix': xx.indices,
'ip': xx.indptr,
'data': xx.data,
}
def _load_sparse_matrix_csr(obj):
return sps.csr_matrix(
(np.array(obj['data'],dtype=np.float64), obj['ix'], obj['ip']),
shape=obj['shape'],
)
@kokumura
Copy link
Author

kokumura commented Feb 12, 2016

たぶんnp.save(), np.savez() を使ったほうがよい。

@kokumura
Copy link
Author

kokumura commented Feb 15, 2016

numpy.savez() を使って書きなおした。

テストに使ったデータ(4000x4000程度の行列)では、 savez_compressed() を使うよりも、savez() で直列化してから zlib.compress() で圧縮するほうが圧縮率が高かった。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment