Skip to content

Instantly share code, notes, and snippets.

@denis-bz
Last active August 21, 2020 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denis-bz/be7bd7382cba105678e92138faede537 to your computer and use it in GitHub Desktop.
Save denis-bz/be7bd7382cba105678e92138faede537 to your computer and use it in GitHub Desktop.
average e.g. 4 x 4 blocks in a sparse matrix A 16 Jun 2020
#!/usr/bin/env python
"""average e.g. 4 x 4 blocks in a sparse matrix A
N x N -> N/4 x N/4, nnz roughly *= blksize
why: reduce big matrices to plot, q+d approximate inverse
Keywords: sparse-matrix, python, scipy, data-compression
"""
# pretty fast -- coo sums duplicate (i,j) entries
from __future__ import division, print_function
import sys
import numpy as np
from numpy.linalg import norm
from scipy import sparse
try:
from zutil import arraydesc
except ImportError:
pass
#...............................................................................
def avblocks( A, blksize=4, verbose=1 ):
"""average e.g. 4 x 4 blocks in sparse matrix A """
assert sparse.issparse( A ), type(A).__name__
if verbose:
print( "\navblocks in: %s blksize %d " % (
arraydesc( A ), blksize ))
I, J, data = sparse.find( A ) # row, col
I //= blksize
J //= blksize
# average = sum / count --
a = sparse.coo_matrix(( data, (I, J) ))
N = sparse.coo_matrix(( np.ones_like( data ), (I, J) ))
a = a.tocsr() # coo: duplicate (i,j) entries will be summed !
N = N.tocsr()
N.data = (1 / blksize**2) / N.data # np.divide: error, not owndata ?
a = a.multiply( N )
if verbose:
print( "avblocks out:", arraydesc( a ))
return a
if __name__ == "__main__":
import sys
from randomsparse import randomsparse
np.set_printoptions( threshold=20, edgeitems=10, linewidth=120, formatter = dict(
float = lambda x: "%.2g" % x )) # float arrays %.2g
print( 80 * "▄" )
#...............................................................................
n = 1e6 # 1e6 4 4: run -t 3 sec
blksize = 3 # 19 3 == 21 3 with 0 fill
avrow = 4
# to change these params, run this.py a=1 b=None 'c = expr' ... in sh or ipython --
for arg in sys.argv[1:]:
exec( arg )
A = randomsparse( n, n, density = avrow / n ) * 100
a = avblocks( A, blksize, verbose=1 )
print( "A: \n", A.A )
print( "avblocks * %d: \n%s" % (
blksize**2, a.A * blksize**2 ))
#!/usr/bin/env python
""" Fast random m x n matrix with ~ m * n * density non-zeros
distrib = uniform normal laplace ...
with numpy >= 1.17, much faster for large m*n than
https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.random.html
"""
# eigenvalues: see https://en.wikipedia.org/wiki/Circular_law
# v sparse => all-0 cols, null space
from __future__ import division, print_function
import sys
import numpy as np
from scipy import sparse
__version__ = "2020-05-31 May" # denis-bz-py t-online.de
#...........................................................................
def randomsparse( m, n, density=4, symmetric=False, posdef=False, incdiag=0,
distrib="uniform", distribargs=dict( low=-1, high=1 ), seed=0,
verbose=1 ):
""" -> sparse m x n csc_matrix with ~ m * n * density non-zeros
density > 1, e.g. 4: average 4 non-zeros per row, 4/n overall
symmetric: A + A.T
posdef: A * A.T
incdiag: add incdiag * I: shifts eigenvalues, no all-0 rows or columns
distrib = np.random uniform normal laplace ... with distribargs
default uniform -1 .. 1
for other distributions, just overwrite e.g. A.data = np.sign( A.data )
seed: an int, see np.random.default_rng or old RandomState
"""
m = int(m)
n = int(n)
if density > 1: # e.g. 4: average 4 non-zeros per row
density /= n
if symmetric or posdef:
density /= 2 # posdef ?
k = int( round( m * n * density ))
# numpy >= 1.17 has much faster .choice
randomgen = np.random.default_rng( seed ) if hasattr( np.random, "default_rng" ) \
else np.random.RandomState( seed ) # old numpy
ind = randomgen.choice( m*n, size=k, replace=False ) # no dups, wikipedia Knuth shuffle
i = ind // n
j = ind % n
# could tmg i, j -> triangular or symmetric
distribf = getattr( randomgen, distrib ) # uniform normal ...
vals = distribf( size=len(ind), **distribargs )
A = sparse.coo_matrix( (vals, (i, j)), shape=(m, n) ) # may have dups, tocsc sums them !
if posdef:
A *= A.T # density ?
elif symmetric:
A += A.T
if incdiag != 0:
A += incdiag * sparse.eye( A.shape[0] )
A = A.tocsc()
A.sort_indices() # for umfpack
if verbose:
print( "randomsparse:", _desc( A, distrib, incdiag, seed ),
"symmetric" if symmetric else "",
"posdef" if posdef else "" )
if verbose > 1:
print( A[:20,:20].A ) # with caller's printoptions
return A
def _desc( A, distrib, incdiag, seed ):
m, n = A.shape
return "%s %.3g nnz/row incdiag %.3g distrib %s %.3g .. %.3g seed %d " % (
A.shape, A.nnz / n, incdiag, distrib, A.min(), A.max(), seed )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment