Last active
February 15, 2016 07:06
-
-
Save kokumura/3cfb9f0e9742dad1b1c4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scipy.sparse as sps | |
import numpy as np | |
from StringIO import StringIO | |
def _np_savez(compressed=False, *args, **kwds): | |
sio = StringIO() | |
if compressed: | |
np.savez_compressed(sio, *args, **kwds) | |
else: | |
np.savez(sio, *args, **kwds) | |
sio.seek(0) | |
return sio.read() | |
def save_sparse_matrix(x, numpy_savez_compression=False): | |
if sps.isspmatrix_csc(x): | |
ret = _save_sparse_matrix_csc(x) | |
elif sps.isspmatrix_csr(x): | |
ret = _save_sparse_matrix_csr(x) | |
elif sps.isspmatrix_coo(x): | |
ret = _save_sparse_matrix_coo(x) | |
else: | |
ret = _save_sparse_matrix_coo(x) | |
return _np_savez(compressed=numpy_savez_compression, **ret) | |
def load_sparse_matrix(data): | |
obj = np.load(StringIO(data)) | |
type_ = obj['type'] | |
if type_ == 'csc': | |
return _load_sparse_matrix_csc(obj) | |
elif type_ == 'csr': | |
return _load_sparse_matrix_csr(obj) | |
elif type_ == 'coo': | |
return _load_sparse_matrix_coo(obj) | |
raise ValueError('invalid type') | |
def _save_sparse_matrix_coo(x): | |
xx = x.tocoo() | |
return { | |
'type': 'coo', | |
'shape': xx.shape, | |
'ri': xx.row, | |
'ci': xx.col, | |
'data': xx.data, | |
} | |
def _load_sparse_matrix_coo(obj): | |
return sps.coo_matrix( | |
(np.array(obj['data'],dtype=np.float64), (obj['ri'], obj['ci'])), | |
shape=obj['shape'], | |
) | |
def _save_sparse_matrix_csc(x): | |
xx = x.tocsc() | |
return { | |
'type': 'csc', | |
'shape': xx.shape, | |
'ix': xx.indices, | |
'ip': xx.indptr, | |
'data': xx.data, | |
} | |
def _load_sparse_matrix_csc(obj): | |
return sps.csc_matrix( | |
(np.array(obj['data'],dtype=np.float64), obj['ix'], obj['ip']), | |
shape=obj['shape'], | |
) | |
def _save_sparse_matrix_csr(x): | |
xx = x.tocsr() | |
return { | |
'type': 'csr', | |
'shape': xx.shape, | |
'ix': xx.indices, | |
'ip': xx.indptr, | |
'data': xx.data, | |
} | |
def _load_sparse_matrix_csr(obj): | |
return sps.csr_matrix( | |
(np.array(obj['data'],dtype=np.float64), obj['ix'], obj['ip']), | |
shape=obj['shape'], | |
) | |
numpy.savez() を使って書きなおした。
テストに使ったデータ(4000x4000程度の行列)では、 savez_compressed() を使うよりも、savez() で直列化してから zlib.compress() で圧縮するほうが圧縮率が高かった。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
たぶん
np.save()
,np.savez()
を使ったほうがよい。