denis-bz/av_sparse_blocks.py

## av_sparse_blocks.py
#!/usr/bin/env python
"""average e.g. 4 x 4 blocks in a sparse matrix A
    N x N -> N/4 x N/4, nnz roughly *= blksize
    why: reduce big matrices to plot, q+d approximate inverse

    Keywords: sparse-matrix, python, scipy, data-compression
"""
    # pretty fast -- coo sums duplicate (i,j) entries

from __future__ import division, print_function
import sys
import numpy as np
from numpy.linalg import norm
from scipy import sparse

try:
    from zutil import arraydesc
except ImportError:
    pass


#...............................................................................
def avblocks( A, blksize=4, verbose=1 ):
    """average e.g. 4 x 4 blocks in sparse matrix A """
    assert sparse.issparse( A ), type(A).__name__
    if verbose:
        print( "\navblocks in: %s  blksize %d " % (
                arraydesc( A ), blksize ))
    I, J, data = sparse.find( A )  # row, col
    I //= blksize
    J //= blksize

        # average = sum / count --
    a = sparse.coo_matrix(( data, (I, J) ))
    N = sparse.coo_matrix(( np.ones_like( data ), (I, J) ))
    a = a.tocsr()  # coo: duplicate (i,j) entries will be summed !
    N = N.tocsr()
    N.data = (1 / blksize**2) / N.data  # np.divide: error, not owndata ?
    a = a.multiply( N )
    if verbose:
        print( "avblocks out:", arraydesc( a ))

    return a


if __name__ == "__main__":
    import sys
    from randomsparse import randomsparse

    np.set_printoptions( threshold=20, edgeitems=10, linewidth=120, formatter = dict(
                float = lambda x: "%.2g" % x ))  # float arrays %.2g
    print( 80 * "▄" )

    #...............................................................................
    n = 1e6  # 1e6 4 4: run -t 3 sec
    blksize = 3  # 19 3 == 21 3 with 0 fill
    avrow = 4

    # to change these params, run this.py  a=1  b=None  'c = expr' ... in sh or ipython --
    for arg in sys.argv[1:]:
        exec( arg )

    A = randomsparse( n, n, density = avrow / n ) * 100

    a = avblocks( A, blksize, verbose=1 )

    print( "A: \n", A.A )
    print( "avblocks * %d: \n%s" % (
            blksize**2, a.A * blksize**2 ))


## randomsparse.py
#!/usr/bin/env python
""" Fast random m x n matrix with ~ m * n * density non-zeros
    distrib = uniform normal laplace ...
        with numpy >= 1.17, much faster for large m*n than
        https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.random.html
"""
# eigenvalues: see https://en.wikipedia.org/wiki/Circular_law
# v sparse => all-0 cols, null space

from __future__ import division, print_function
import sys
import numpy as np
from scipy import sparse

__version__ = "2020-05-31 May"  # denis-bz-py t-online.de

#...........................................................................
def randomsparse( m, n, density=4, symmetric=False, posdef=False, incdiag=0,
            distrib="uniform", distribargs=dict( low=-1, high=1 ), seed=0,
            verbose=1 ):
    """ -> sparse m x n csc_matrix with ~ m * n * density non-zeros
        density > 1, e.g. 4: average 4 non-zeros per row, 4/n overall
        symmetric: A + A.T
        posdef: A * A.T
        incdiag: add incdiag * I: shifts eigenvalues, no all-0 rows or columns
        distrib = np.random uniform normal laplace ... with distribargs
            default uniform -1 .. 1
            for other distributions, just overwrite e.g. A.data = np.sign( A.data )
        seed: an int, see np.random.default_rng or old RandomState
    """
    m = int(m)
    n = int(n)
    if density > 1:  # e.g. 4: average 4 non-zeros per row
        density /= n
    if symmetric or posdef:
        density /= 2  # posdef ?
    k = int( round( m * n * density ))
        # numpy >= 1.17 has much faster .choice
    randomgen = np.random.default_rng( seed ) if hasattr( np.random, "default_rng" ) \
            else np.random.RandomState( seed )  # old numpy
    ind = randomgen.choice( m*n, size=k, replace=False )  # no dups, wikipedia Knuth shuffle
    i = ind // n
    j = ind % n
        # could tmg i, j -> triangular or symmetric
    distribf = getattr( randomgen, distrib )  # uniform normal ...
    vals = distribf( size=len(ind), **distribargs )

    A = sparse.coo_matrix( (vals, (i, j)), shape=(m, n) )  # may have dups, tocsc sums them !
    if posdef:
        A *= A.T  # density ?
    elif symmetric:
        A += A.T
    if incdiag != 0:
        A += incdiag * sparse.eye( A.shape[0] )
    A = A.tocsc()
    A.sort_indices()  # for umfpack
    if verbose:
        print( "randomsparse:", _desc( A, distrib, incdiag, seed ),
                "symmetric" if symmetric else "",
                "posdef" if posdef else "" )
        if verbose > 1:
            print( A[:20,:20].A )  # with caller's printoptions

    return A


def _desc( A, distrib, incdiag, seed ):
    m, n = A.shape
    return "%s  %.3g nnz/row  incdiag %.3g  distrib %s %.3g .. %.3g  seed %d " % (
            A.shape, A.nnz / n, incdiag, distrib, A.min(), A.max(), seed )
	#!/usr/bin/env python
	"""average e.g. 4 x 4 blocks in a sparse matrix A
	N x N -> N/4 x N/4, nnz roughly *= blksize
	why: reduce big matrices to plot, q+d approximate inverse

	Keywords: sparse-matrix, python, scipy, data-compression
	"""
	# pretty fast -- coo sums duplicate (i,j) entries

	from __future__ import division, print_function
	import sys
	import numpy as np
	from numpy.linalg import norm
	from scipy import sparse

	try:
	from zutil import arraydesc
	except ImportError:
	pass


	#...............................................................................
	def avblocks( A, blksize=4, verbose=1 ):
	"""average e.g. 4 x 4 blocks in sparse matrix A """
	assert sparse.issparse( A ), type(A).__name__
	if verbose:
	print( "\navblocks in: %s blksize %d " % (
	arraydesc( A ), blksize ))
	I, J, data = sparse.find( A ) # row, col
	I //= blksize
	J //= blksize

	# average = sum / count --
	a = sparse.coo_matrix(( data, (I, J) ))
	N = sparse.coo_matrix(( np.ones_like( data ), (I, J) ))
	a = a.tocsr() # coo: duplicate (i,j) entries will be summed !
	N = N.tocsr()
	N.data = (1 / blksize**2) / N.data # np.divide: error, not owndata ?
	a = a.multiply( N )
	if verbose:
	print( "avblocks out:", arraydesc( a ))

	return a


	if __name__ == "__main__":
	import sys
	from randomsparse import randomsparse

	np.set_printoptions( threshold=20, edgeitems=10, linewidth=120, formatter = dict(
	float = lambda x: "%.2g" % x )) # float arrays %.2g
	print( 80 * "▄" )

	#...............................................................................
	n = 1e6 # 1e6 4 4: run -t 3 sec
	blksize = 3 # 19 3 == 21 3 with 0 fill
	avrow = 4

	# to change these params, run this.py a=1 b=None 'c = expr' ... in sh or ipython --
	for arg in sys.argv[1:]:
	exec( arg )

	A = randomsparse( n, n, density = avrow / n ) * 100

	a = avblocks( A, blksize, verbose=1 )

	print( "A: \n", A.A )
	print( "avblocks * %d: \n%s" % (
	blksize*2, a.A blksize**2 ))
	#!/usr/bin/env python
	""" Fast random m x n matrix with ~ m * n * density non-zeros
	distrib = uniform normal laplace ...
	with numpy >= 1.17, much faster for large m*n than
	https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.random.html
	"""
	# eigenvalues: see https://en.wikipedia.org/wiki/Circular_law
	# v sparse => all-0 cols, null space

	from __future__ import division, print_function
	import sys
	import numpy as np
	from scipy import sparse

	__version__ = "2020-05-31 May" # denis-bz-py t-online.de

	#...........................................................................
	def randomsparse( m, n, density=4, symmetric=False, posdef=False, incdiag=0,
	distrib="uniform", distribargs=dict( low=-1, high=1 ), seed=0,
	verbose=1 ):
	""" -> sparse m x n csc_matrix with ~ m * n * density non-zeros
	density > 1, e.g. 4: average 4 non-zeros per row, 4/n overall
	symmetric: A + A.T
	posdef: A * A.T
	incdiag: add incdiag * I: shifts eigenvalues, no all-0 rows or columns
	distrib = np.random uniform normal laplace ... with distribargs
	default uniform -1 .. 1
	for other distributions, just overwrite e.g. A.data = np.sign( A.data )
	seed: an int, see np.random.default_rng or old RandomState
	"""
	m = int(m)
	n = int(n)
	if density > 1: # e.g. 4: average 4 non-zeros per row
	density /= n
	if symmetric or posdef:
	density /= 2 # posdef ?
	k = int( round( m * n * density ))
	# numpy >= 1.17 has much faster .choice
	randomgen = np.random.default_rng( seed ) if hasattr( np.random, "default_rng" ) \
	else np.random.RandomState( seed ) # old numpy
	ind = randomgen.choice( m*n, size=k, replace=False ) # no dups, wikipedia Knuth shuffle
	i = ind // n
	j = ind % n
	# could tmg i, j -> triangular or symmetric
	distribf = getattr( randomgen, distrib ) # uniform normal ...
	vals = distribf( size=len(ind), **distribargs )

	A = sparse.coo_matrix( (vals, (i, j)), shape=(m, n) ) # may have dups, tocsc sums them !
	if posdef:
	A *= A.T # density ?
	elif symmetric:
	A += A.T
	if incdiag != 0:
	A += incdiag * sparse.eye( A.shape[0] )
	A = A.tocsc()
	A.sort_indices() # for umfpack
	if verbose:
	print( "randomsparse:", _desc( A, distrib, incdiag, seed ),
	"symmetric" if symmetric else "",
	"posdef" if posdef else "" )
	if verbose > 1:
	print( A[:20,:20].A ) # with caller's printoptions

	return A


	def _desc( A, distrib, incdiag, seed ):
	m, n = A.shape
	return "%s %.3g nnz/row incdiag %.3g distrib %s %.3g .. %.3g seed %d " % (
	A.shape, A.nnz / n, incdiag, distrib, A.min(), A.max(), seed )