{{ message }}

Instantly share code, notes, and snippets.

# jakevdp/banded_tools.py

Created Dec 23, 2011
Benchmarks for eigenvalue decomposition
 from time import time import numpy as np from scipy.sparse import spdiags, issparse, dia_matrix from scipy.sparse.linalg import factorized from scipy import linalg as splinalg class BandedMatrix(object): def __init__(self, data, lu=None): if issparse(data): if lu: raise ValueError("Data type not understood") M = data.todia() Nd_u = max(M.offsets) Nd_l = -min(M.offsets) self.data_ = np.zeros((1 + Nd_u + Nd_l, min(M.shape))) self.data_[Nd_u - M.offsets, :M.data.shape[1]] = M.data self.l_ = Nd_l self.u_ = Nd_u else: self.data_ = data self.l_ = lu[0] self.u_ = lu[1] assert self.data_.shape[0] == 1 + self.l_ + self.u_ assert self.u_ < self.data_.shape[1] assert self.l_ < self.data_.shape[1] shape = property(lambda self: (self.data_.shape[1], self.data_.shape[1])) lu = property(lambda self: (self.l_, self.u_)) data = property(lambda self: self.data_) def tocsr(self): return spdiags(self.data, range(-self.l_, self.u_ + 1)[::-1], *self.shape, format='csr') def tocsc(self): return spdiags(self.data, range(-self.l_, self.u_ + 1)[::-1], *self.shape, format='csc') def todia(self): return spdiags(self.data, range(-self.l_, self.u_ + 1)[::-1], *self.shape, format='dia') def toarray(self): return self.todia().toarray() def todense(self): return self.todia().todense() def to_symmetric(self, lower=False): if lower: return SymmetricBandedMatrix(self.data_[self.u_:], lower=True) else: return SymmetricBandedMatrix(self.data_[:self.u_ + 1], lower=False) def diag(self, d=0): """ return a view of the d-th diagonal """ assert d == 0 return self.data_[self.u_] class SymmetricBandedMatrix(object): def __init__(self, data, lower=False): if issparse(data): M = data.todia() if lower: i_lower = np.where(M.offsets <= 0) Nd = -min(M.offsets) self.data_ = np.zeros((Nd + 1, M.shape[1])) self.data_[-M.offsets[i_lower], :M.data.shape[1]] = M.data[i_lower] else: i_upper = np.where(M.offsets >= 0) Nd = max(M.offsets) self.data_ = np.zeros((Nd + 1, M.shape[1])) self.data_[Nd - M.offsets[i_upper], :M.data.shape[1]] = M.data[i_upper] else: self.data_ = data self.lower_ = lower data = property(lambda self: self.data_) lower = property(lambda self: self.lower_) shape = property(lambda self: (self.data_.shape[1], self.data_.shape[1])) def _nonsym_data(self): Nd = self.data_.shape[0] - 1 newdata = np.zeros((1 + 2 * Nd, self.data_.shape[1])) if self.lower_: newdata[-Nd - 1:] = self.data_ for i in range(Nd): newdata[Nd - i - 1, i + 1:] = newdata[Nd + 1 + i, : -1 - i] else: newdata[:Nd + 1] = self.data_ for i in range(Nd): newdata[Nd + 1 + i, :-1 - i] = newdata[Nd - i - 1, i + 1:] return newdata def tocsr(self): Nd = self.data_.shape[0] - 1 return spdiags(self._nonsym_data(), np.arange(-Nd, Nd + 1)[::-1], *self.shape, format='csr') def tocsc(self): Nd = self.data_.shape[0] - 1 return spdiags(self._nonsym_data(), np.arange(-Nd, Nd + 1)[::-1], *self.shape, format='csc') def todia(self): Nd = self.data_.shape[0] - 1 return spdiags(self._nonsym_data(), np.arange(-Nd, Nd + 1)[::-1], *self.shape, format='dia') def toarray(self): return self.todia().toarray() def todense(self): return self.todia().todense() def togeneral(self): Nd = self.data_.shape[0] - 1 return BandedMatrix(self._nonsym_data(), (Nd, Nd)) def diag(self, d=0): """ return a view of the d-th diagonal """ assert d == 0 if self.lower_: return self.data_[0] else: return self.data_[-1] def solve_banded(M, b, overwrite_M=False, overwrite_b=False): assert M.__class__ == BandedMatrix return splinalg.solve_banded(M.lu, M.data, b, overwrite_ab=overwrite_M, overwrite_b=overwrite_b) def solveh_banded(M, b, overwrite_M=False, overwrite_b=False): assert M.__class__ == SymmetricBandedMatrix return splinalg.solveh_banded(M.data, b, overwrite_ab=overwrite_M, overwrite_b=overwrite_b) def test_tools(): N = 6 nnz = 6 X = np.zeros((N, N)) ind = np.arange(nnz, dtype=int) X[ind, ind] = 1 X[ind[1:], ind[:-1]] = 2 + 0.1 * np.arange(nnz - 1) X[ind[:-1], ind[1:]] = 3 + 0.1 * np.arange(nnz - 1) X[ind[:-2], ind[2:]] = 4 + 0.1 * np.arange(nnz - 2) from scipy.sparse import dia_matrix print X X_dia = dia_matrix(X) X_banded = BandedMatrix(X_dia) X_up = SymmetricBandedMatrix(X_dia) X_lo = SymmetricBandedMatrix(X_dia, lower=True) print np.all(X_up.toarray() == X_up.togeneral().toarray()) print np.all(X_lo.toarray() == X_lo.togeneral().toarray()) print np.all(X_banded.toarray() == X_dia.toarray()) print print X_banded.to_symmetric(lower=True).toarray() print print X_banded.to_symmetric(lower=False).toarray() def test_solvers(dim=128 ** 2): Nd = 2 N = 1 + 2 * Nd data = np.ones((N, dim)) for i in range(1, Nd + 1): data[i: N - i] *= 1.633 X_dia = spdiags(data, 2 * np.arange(-Nd, Nd + 1), dim, dim) b = np.random.random(dim) # subtract 1 from the diagonal print "super-lu" t_1 = time() X_csc = X_dia.tocsc() t0 = time() fac = factorized(X_csc) t1 = time() x0 = fac(b) t2 = time() print ' - convert to csc: %.2g sec' % (t0 - t_1) print ' - factorize: %.2g sec' % (t1 - t0) print ' - solve: %.2g sec' % (t2 - t1) print "banded general:" t_1 = time() X_gbn = BandedMatrix(X_dia) t0 = time() x1 = solve_banded(X_gbn, b) t1 = time() print ' - convert to banded: %.2g' % (t0 - t_1) print ' - solve: %.2g sec' % (t1 - t0) print "banded symmetric:" t_1 = time() X_sbn = SymmetricBandedMatrix(X_dia) t0 = time() x2 = solveh_banded(X_sbn, b) t1 = time() print ' - convert to banded: %.2g' % (t0 - t_1) print ' - solve: %.2g sec' % (t1 - t0) print print "Results match:", print np.allclose(x0, x1), print np.allclose(x1, x2) if __name__ == '__main__': test_solvers()
 """ benchmarks for eigensolution of the Laplacian matrix. Note: The banded version is not as efficient as it could be: it should be upgraded to scipy.linalg.solveh_banded. Better, the LAPACK routine should be used to first preprocess the banded matrix by converting it into a tridiagonal matrix, and performing the solution using this. These routines are not currently wrapped in scipy, so we'd have to interface with LAPACK directly. """ from time import time import numpy as np import scipy as sp from scipy.linalg import solve_banded, solve, eig_banded from scipy.sparse import spdiags from scipy.sparse.linalg import LinearOperator, factorized from sklearn.utils.arpack import eigsh from sklearn.feature_extraction import image from sklearn.utils.graph import graph_laplacian import banded_tools def get_Laplacian_matrix(num_downsamples = 2): """ Construct the laplacian matrix used in the plot_lena_segmentation example. Some of the code in this function is from """ lena = sp.misc.lena() # downsample pixels by factors of two for i in range(num_downsamples): lena = (lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2]) # Convert the image into a graph with the value of the gradient on the # edges. adjacency = image.img_to_graph(lena) # Take a decreasing function of the gradient: an exponential # The smaller beta is, the more independant the segmentation is of the # actual image. For beta=1, the segmentation is close to a voronoi beta = 5 eps = 1e-6 adjacency.data = np.exp(-beta*adjacency.data/lena.std()) + eps # Compute the negative laplacian of the adjacency (this is from # sklearn.cluster.spectral_embedding) laplacian, dd = graph_laplacian(adjacency, normed=True, return_diag=True) # Set diagonal of Laplacian to zero laplacian = laplacian.tocoo() diag_idx = (laplacian.row == laplacian.col) laplacian.data[diag_idx] = 0 # Convert to diagonal matrix: this gives the most efficient # matrix/vector products return laplacian.todia() def construct_OPinv_banded(M_banded): """ construct LinearOperator OPinv such that if (M - I) * x = b then x = OPinv * b for vectors x and b, where I is the identity matrix """ Nd = (M_banded.shape[0] - 1) / 2 N = M_banded.shape[1] OP_banded = M_banded.copy() OP_banded[Nd] -= 1 matvec = lambda b: solve_banded((Nd, Nd), OP_banded, b) return LinearOperator((N, N), matvec = matvec) def bench_sparse_solvers(num_downsamples=2, rseed=None, super_lu=True, banded_gen=True, banded_sym=True): print "creating Laplacian matrix:" M_dia = -get_Laplacian_matrix(num_downsamples) print " - shape of matrix:", M_dia.shape # subtract 1 from the diagonal i = np.where(M_dia.offsets == 0) M_dia.data[i] -= 1 if rseed is not None: np.random.seed(rseed) b = np.zeros(M_dia.shape[1]) res = [] if super_lu: print "super-lu" t_1 = time() M_csc = M_dia.tocsc() t0 = time() fac = factorized(M_csc) t1 = time() x = fac(b) t2 = time() print ' - convert to csc: %.2g sec' % (t0 - t_1) print ' - factorize: %.2g sec' % (t1 - t0) print ' - solve: %.2g sec' % (t2 - t1) res.append(x) if banded_gen: print "banded general:" t_1 = time() M_gbn = banded_tools.BandedMatrix(M_dia) t0 = time() x = banded_tools.solve_banded(M_gbn, b) t1 = time() print ' - convert to banded: %.2g' % (t0 - t_1) print ' - solve: %.2g sec' % (t1 - t0) res.append(x) if banded_sym: print "banded symmetric:" t_1 = time() M_sbn = banded_tools.SymmetricBandedMatrix(-M_dia) d = M_sbn.diag() d += 1E-8 t0 = time() x = banded_tools.solveh_banded(M_sbn, b) t1 = time() print ' - convert to banded: %.2g' % (t0 - t_1) print ' - solve: %.2g sec' % (t1 - t0) res.append(x) for r in res: print r[:6] print print "Results match:", for i in range(len(res)): print np.allclose(res[i - 1], res[i]) print def bench_eigendecompositions(num_downsamples=2, nev=11, tol=1E-6, eigsh_direct=True, eigsh_invert=True, eigsh_invert_banded=True, eigsh_invert_buckling=True, eigsh_invert_cayley=True, check_equivalence=True): """ Run benchmark tests of different eigenvalue decomposition methods """ print "creating Laplacian matrix:" M_dia = -get_Laplacian_matrix(num_downsamples) print " - shape of matrix:", M_dia.shape print print "constructing banded representation:" M_banded = banded_tools.BandedMatrix(M_dia) print " - banded storage shape:", M_banded.data.shape if check_equivalence: print print "checking that conversion was successful:" M_csr = M_banded.tocsr() v = np.random.random(M_banded.shape[1]) t0 = time() x1 = M_dia * v t1 = time() x2 = M_csr * v t2 = time() print " - matrix products match:", np.allclose(x1, x2) print " [dia_matrix mat-vec product: %.2g sec]" % (t1 - t0) print " [csr_matrix mat-vec product: %.2g sec]" % (t2 - t1) print 75 * '_' print "Computing %i eigenvalues with tol=%.1g" % (nev, tol) if eigsh_direct: print print "eigsh: direct mode" t0 = time() evals, evecs = eigsh(M_dia, nev, which='LA', tol=tol) t1 = time() print " - finished in %.2g sec" % (t1 - t0) print "eigenvalues:" print evals if eigsh_invert: print print "eigsh: shift-invert mode" t0 = time() evals, evecs = eigsh(M_dia, nev, sigma=1, which='LM', tol=tol) t1 = time() print " - finished in %.2g sec" % (t1 - t0) print "eigenvalues:" print evals if eigsh_invert_banded: print print "eigsh: shift-invert mode (banded representation)" t0 = time() evals, evecs = eigsh(M_dia, nev, sigma=1, OPinv=construct_OPinv_banded(M_banded.data), which='LM', tol=tol) t1 = time() print " - finished in %.2g sec" % (t1 - t0) print "eigenvalues:" print evals if eigsh_invert_buckling: print print "eigsh: shift-invert/buckling mode" t0 = time() evals, evecs = eigsh(M_dia, nev, sigma=1, which='LM', tol=tol, mode='buckling') t1 = time() print " - finished in %.2g sec" % (t1 - t0) print "eigenvalues:" print evals if eigsh_invert_cayley: print print "eigsh: shift-invert/cayley mode" t0 = time() evals, evecs = eigsh(M_dia, nev, sigma=1, which='LM', tol=tol, mode='cayley') t1 = time() print " - finished in %.2g sec" % (t1 - t0) print "eigenvalues:" print evals if __name__ == '__main__': #bench_sparse_solvers() bench_eigendecompositions()
 Related to https://github.com/scikit-learn/scikit-learn/pull/50 Summary: - shift-invert mode is fast, especially with newer versions of scipy (probably due to improvements in SuperLU?) - using the LAPACK banded functionality doesn't help much. Wrapping LAPACK's DSBTRD and DPTSV may improve this a bit, but I'm not sure it's worth it given the performance of later scipy versions - on the 16384 x 16384 array used in the plot_lena_segmentation.py example, newer versions of scipy with a simple shift-invert converge to the solution in < 1 sec with tol=1E-12. This should work fine in practice.
 Scipy 0.10 ========== In [3]: scipy.__version__ Out[3]: '0.10.0.dev' In [4]: bench_eigendecompositions(tol=1E-12) creating Laplacian matrix: - shape of matrix: (16384, 16384) constructing banded representation: - banded storage shape: (257, 16384) checking that conversion was successful: - matrix products match: True [dia_matrix mat-vec product: 0.00063 sec] [csr_matrix mat-vec product: 0.0005 sec] ___________________________________________________________________________ Computing 11 eigenvalues with tol=1e-12 eigsh: direct mode - finished in 55 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode - finished in 1 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode (banded representation) - finished in 41 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/buckling mode - finished in 1 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/cayley mode - finished in 1 sec eigenvalues: [ 0.99948885 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] Scipy 0.9 ========= In [3]: scipy.__version__ Out[3]: '0.9.1.dev' In [4]: bench_eigendecompositions(tol=1E-12) creating Laplacian matrix: - shape of matrix: (16384, 16384) constructing banded representation: - banded storage shape: (257, 16384) checking that conversion was successful: - matrix products match: True [dia_matrix mat-vec product: 0.00062 sec] [csr_matrix mat-vec product: 0.00051 sec] ___________________________________________________________________________ Computing 11 eigenvalues with tol=1e-12 eigsh: direct mode - finished in 56 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode - finished in 1.1 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode (banded representation) - finished in 42 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/buckling mode - finished in 1 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/cayley mode - finished in 1 sec eigenvalues: [ 0.99948885 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] Scipy 0.8 ========= In [5]: scipy.__version__ Out[5]: '0.8.0.dev' In [6]: bench_eigendecompositions(tol=1E-12) creating Laplacian matrix: - shape of matrix: (16384, 16384) constructing banded representation: - banded storage shape: (257, 16384) checking that conversion was successful: - matrix products match: True [dia_matrix mat-vec product: 0.00066 sec] [csr_matrix mat-vec product: 0.0005 sec] ___________________________________________________________________________ Computing 11 eigenvalues with tol=1e-12 eigsh: direct mode - finished in 55 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode - finished in 1 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode (banded representation) - finished in 50 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/buckling mode - finished in 1.5 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/cayley mode - finished in 1.6 sec eigenvalues: [ 0.99948885 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ]
 Scipy 0.9 ========= In [11]: scipy.__version__ Out[11]: '0.9.1.dev' In [12]: bench_eigendecompositions() creating Laplacian matrix: - shape of matrix: (16384, 16384) constructing banded representation: - banded storage shape: (257, 16384) checking that conversion was successful: - matrix products match: True [dia_matrix mat-vec product: 0.00065 sec] [csr_matrix mat-vec product: 0.00051 sec] ___________________________________________________________________________ Computing 11 eigenvalues with tol=1e-06 eigsh: direct mode - finished in 37 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode - finished in 0.8 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode (banded representation) - finished in 39 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/buckling mode - finished in 1.2 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/cayley mode - finished in 1.3 sec eigenvalues: [ 0.99948885 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] Scipy 0.8 ========= In [3]: scipy.__version__ Out[3]: '0.8.0.dev' In [4]: bench_eigendecompositions() creating Laplacian matrix: - shape of matrix: (16384, 16384) constructing banded representation: - banded storage shape: (257, 16384) checking that conversion was successful: - matrix products match: True [dia_matrix mat-vec product: 0.00064 sec] [csr_matrix mat-vec product: 0.00051 sec] ___________________________________________________________________________ Computing 11 eigenvalues with tol=1e-06 eigsh: direct mode - finished in 37 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode - finished in 0.81 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode (banded representation) - finished in 30 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/buckling mode - finished in 0.88 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/cayley mode - finished in 0.96 sec eigenvalues: [ 0.99948885 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] Scipy 0.7 ========= In [4]: scipy.__version__ Out[4]: '0.7.0' In [5]: bench_eigendecompositions() creating Laplacian matrix: - shape of matrix: (16384, 16384) constructing banded representation: - banded storage shape: (257, 16384) checking that conversion was successful: - matrix products match: True [dia_matrix mat-vec product: 0.00068 sec] [csr_matrix mat-vec product: 0.00059 sec] ___________________________________________________________________________ Computing 11 eigenvalues with tol=1e-06 eigsh: direct mode - finished in 74 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode - finished in 19 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert mode (banded representation) - finished in 31 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/buckling mode - finished in 20 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ] eigsh: shift-invert/cayley mode - finished in 20 sec eigenvalues: [ 0.99948884 0.99958284 0.99966101 0.99970649 0.99973406 0.99977508 0.99983923 0.99989525 0.99993584 0.9999775 1. ]