toobaz/gist:e40ecfcf0e77ba158d8ac60a39faeaf9

## gistfile1.py
import numpy as np
import scipy.sparse

def _alt_lexsort(arr, kind='stable'):
    """
    Optimized and more flexible (allows to choose sorting algorithm) version of
    np.lexsort.
    Assumes positive elements such that the product of the maximum values in
    each layer of arr, multiplied by the length of each layer
    (i.e., arr[0].shape[0]), plus some margin for roundings, is lower than
    maxint.
    Only tested with integers.
    """
    n_layers = len(arr)

    bits_for_layers = [int(np.ceil(np.log2(max(arr[i].max(), 1))))
                       for i in range(n_layers-1)] + [0]

    # Go backwards for consistency with np.lexsort
    prod = np.bitwise_or.reduce([arr[i] << bits_for_layers[n_layers - i-1]
                                 for i in range(n_layers)])
    return np.argsort(prod, kind=kind)

def coo_get_block(mat, rows, columns):
    """
    Fancy-indexed submatrix getter.
    """
    rows = np.asarray(rows)
    columns = np.asarray(columns)

    if ((len(rows) > len(np.unique(rows))) |
        (len(columns) > len(np.unique(columns)))):
        raise NotImplementedError("Repeated rows or columns are unsupported")

    new_shape = len(rows), len(columns)

    # Translation of negative indices:
    rows[rows < 0] += mat.shape[0]
    columns[columns < 0] += mat.shape[1]

    rows_pos = -np.ones((mat.shape[0],), dtype=int)
    rows_pos[rows] = np.arange(new_shape[0], dtype=int)
    cols_pos = -np.ones((mat.shape[1],), dtype=int)
    cols_pos[columns] = np.arange(new_shape[1], dtype=int)

    row_is_sel = np.isin(mat.row, rows)
    col_is_sel = np.isin(mat.col, columns)
    is_sel = row_is_sel & col_is_sel

    data, row, col = (arr[is_sel] for arr in (mat.data, mat.row, mat.col))

    new_mat = scipy.sparse.coo_array((data, (rows_pos[row], cols_pos[col])),
                                     shape=new_shape, dtype=mat.dtype)

    return new_mat


def coo_set_block(mat, other, rows, columns):
    """
    Fancy-indexed submatrix setter.
    """
    rows = np.asarray(rows)
    columns = np.asarray(columns)

    if not isinstance(other, scipy.sparse._coo._coo_base):
        other = scipy.sparse.coo_array(other)

    new_shape = len(rows), len(columns)

    # Translation of negative indices:
    rows[rows < 0] += mat.shape[0]
    columns[columns < 0] += mat.shape[1]

    keep_rows = ~np.isin(mat.row, rows)
    keep_cols = ~np.isin(mat.col, columns)
    keep_mask = keep_rows | keep_cols

    to_keep = [arr[keep_mask] for arr in (mat.row, mat.col, mat.data)]

    shifted_rows = rows[other.row]
    shifted_cols = columns[other.col]

    new_row = np.concatenate([to_keep[0], shifted_rows])
    new_col = np.concatenate([to_keep[1], shifted_cols])
    new_data = np.concatenate([to_keep[2], other.data])

    order = np.lexsort((new_row, new_col))

    mat.data = new_data[order]
    mat.col = new_col[order]
    mat.row = new_row[order]


OP_ATTRS = {'sum' : 'add', 'sub' : 'sub', 'div' : 'truediv', 'mul' : 'mul'}


def coo_binary_op(mat, other, op='sum'):
    """
    Binary operation between COO sparse matrices.
    Only supports operations such that op(0, 0) = 0, hence preserving
    sparsity (e.g. not np.power).
    Note that sum could be fastpathed (relying on .sum_duplicates()).
    """
    # We need canonical format (we don't necessarily care about order - except
    # for performance - but we do care about unicity):
    mat.sum_duplicates()
    other.sum_duplicates()

    all_row = np.concatenate([mat.row, other.row])
    all_col = np.concatenate([mat.col, other.col])
    all_data = np.concatenate([mat.data, other.data])
    all_ind = np.concatenate([np.zeros(len(mat.data), dtype=int),
                              np.ones(len(other.data), dtype=int)])

    # Use _alt_lexsort as it better exploits the partial sortedness of data:
    try:
        order = _alt_lexsort((all_ind, all_col, all_row))
    except Exception as exc:
        print((all_ind, all_col, all_row))
        raise exc

    ord_row, ord_col, ord_data, ord_ind = [arr[order]
                                           for arr in [all_row, all_col,
                                                       all_data, all_ind]]

    common_cells = np.zeros(len(ord_data) + 1, dtype=bool)
    common_cells[1:-1] = ((ord_row[1:] == ord_row[:-1]) &
                          (ord_col[1:] == ord_col[:-1]))
    # Last element cannot possibly be a duplicate with indicator 0
    only_mat = (ord_ind == 0) & ~common_cells[1:]
    # First element cannot possibly be a duplicate with indicator 1
    only_other = (ord_ind == 1) & ~common_cells[:-1]

    op = OP_ATTRS.get(op, op)

    if np.any(common_cells):
        # Change values in place, we will remove redundant positions later
        operands = (ord_data[common_cells[:-1]], ord_data[common_cells[1:]])
        method = getattr(operands[0], f'__{op}__')
        ord_data[common_cells[1:]] = method(operands[1])

    ord_data[only_mat] = getattr(ord_data[only_mat], f'__{op}__')(0)
    ord_data[only_other] = getattr(ord_data[only_other], f'__r{op}__')(0)

    # Now "squeeze" by removing duplicate indices:
    new_mat = scipy.sparse.coo_array((ord_data[~common_cells[:-1]],
                                      (ord_row[~common_cells[:-1]],
                                       ord_col[~common_cells[:-1]])),
                                     shape=mat.shape, dtype=ord_data.dtype)

    return new_mat


def test():
    # 7.28 TiB in dense representation:
    m1 = scipy.sparse.lil_array((10**6, 10**6), dtype=int)
    # 1.82 TiB in dense representation:
    m2 = scipy.sparse.lil_array((5*10**5, 5*10**5), dtype=int)

    coords_1 = np.random.randint(0, 10**6, (10**3, 2))
    coords_2 = np.random.randint(0, 10**5, (10**3, 2))

    for idx, (x, y) in enumerate(coords_1):
        m1[x, y] = idx

    for idx, (x, y) in enumerate(coords_2):
        m2[x, y] = idx

    # Two cells that will actually add up below:
    m1[1*10**5 + 10, 2*10**5 + 10] = m2[10, 10] = -1

    m1 = scipy.sparse.coo_array(m1)
    tot_1 = m1.sum()
    m2 = scipy.sparse.coo_array(m2)
    tot_2 = m2.sum()

    rows = np.concatenate([np.arange(1*10**5, 3*10**5),
                           np.arange(4*10**5, 7*10**5)])
    cols = np.concatenate([np.arange(2*10**5, 5*10**5),
                           np.arange(7*10**5, 9*10**5)])

    extracted = coo_get_block(m1, rows, cols)
    added = coo_binary_op(extracted, m2)
    coo_set_block(m1, added, rows, cols)

    # Sum from 0 to 999, times 2, minus 2 manually set:
    total = m1.sum()
    assert(total == 998998), total
    the_element = scipy.sparse.csr_array(m1)[1*10**5 + 10, 2*10**5 + 10]
    assert(the_element == -2), the_element

if __name__ == '__main__':
    test()
	import numpy as np
	import scipy.sparse

	def _alt_lexsort(arr, kind='stable'):
	"""
	Optimized and more flexible (allows to choose sorting algorithm) version of
	np.lexsort.
	Assumes positive elements such that the product of the maximum values in
	each layer of arr, multiplied by the length of each layer
	(i.e., arr[0].shape[0]), plus some margin for roundings, is lower than
	maxint.
	Only tested with integers.
	"""
	n_layers = len(arr)

	bits_for_layers = [int(np.ceil(np.log2(max(arr[i].max(), 1))))
	for i in range(n_layers-1)] + [0]

	# Go backwards for consistency with np.lexsort
	prod = np.bitwise_or.reduce([arr[i] << bits_for_layers[n_layers - i-1]
	for i in range(n_layers)])
	return np.argsort(prod, kind=kind)

	def coo_get_block(mat, rows, columns):
	"""
	Fancy-indexed submatrix getter.
	"""
	rows = np.asarray(rows)
	columns = np.asarray(columns)

	if ((len(rows) > len(np.unique(rows))) \|
	(len(columns) > len(np.unique(columns)))):
	raise NotImplementedError("Repeated rows or columns are unsupported")

	new_shape = len(rows), len(columns)

	# Translation of negative indices:
	rows[rows < 0] += mat.shape[0]
	columns[columns < 0] += mat.shape[1]

	rows_pos = -np.ones((mat.shape[0],), dtype=int)
	rows_pos[rows] = np.arange(new_shape[0], dtype=int)
	cols_pos = -np.ones((mat.shape[1],), dtype=int)
	cols_pos[columns] = np.arange(new_shape[1], dtype=int)

	row_is_sel = np.isin(mat.row, rows)
	col_is_sel = np.isin(mat.col, columns)
	is_sel = row_is_sel & col_is_sel

	data, row, col = (arr[is_sel] for arr in (mat.data, mat.row, mat.col))

	new_mat = scipy.sparse.coo_array((data, (rows_pos[row], cols_pos[col])),
	shape=new_shape, dtype=mat.dtype)

	return new_mat


	def coo_set_block(mat, other, rows, columns):
	"""
	Fancy-indexed submatrix setter.
	"""
	rows = np.asarray(rows)
	columns = np.asarray(columns)

	if not isinstance(other, scipy.sparse._coo._coo_base):
	other = scipy.sparse.coo_array(other)

	new_shape = len(rows), len(columns)

	# Translation of negative indices:
	rows[rows < 0] += mat.shape[0]
	columns[columns < 0] += mat.shape[1]

	keep_rows = ~np.isin(mat.row, rows)
	keep_cols = ~np.isin(mat.col, columns)
	keep_mask = keep_rows \| keep_cols

	to_keep = [arr[keep_mask] for arr in (mat.row, mat.col, mat.data)]

	shifted_rows = rows[other.row]
	shifted_cols = columns[other.col]

	new_row = np.concatenate([to_keep[0], shifted_rows])
	new_col = np.concatenate([to_keep[1], shifted_cols])
	new_data = np.concatenate([to_keep[2], other.data])

	order = np.lexsort((new_row, new_col))

	mat.data = new_data[order]
	mat.col = new_col[order]
	mat.row = new_row[order]


	OP_ATTRS = {'sum' : 'add', 'sub' : 'sub', 'div' : 'truediv', 'mul' : 'mul'}


	def coo_binary_op(mat, other, op='sum'):
	"""
	Binary operation between COO sparse matrices.
	Only supports operations such that op(0, 0) = 0, hence preserving
	sparsity (e.g. not np.power).
	Note that sum could be fastpathed (relying on .sum_duplicates()).
	"""
	# We need canonical format (we don't necessarily care about order - except
	# for performance - but we do care about unicity):
	mat.sum_duplicates()
	other.sum_duplicates()

	all_row = np.concatenate([mat.row, other.row])
	all_col = np.concatenate([mat.col, other.col])
	all_data = np.concatenate([mat.data, other.data])
	all_ind = np.concatenate([np.zeros(len(mat.data), dtype=int),
	np.ones(len(other.data), dtype=int)])

	# Use _alt_lexsort as it better exploits the partial sortedness of data:
	try:
	order = _alt_lexsort((all_ind, all_col, all_row))
	except Exception as exc:
	print((all_ind, all_col, all_row))
	raise exc

	ord_row, ord_col, ord_data, ord_ind = [arr[order]
	for arr in [all_row, all_col,
	all_data, all_ind]]

	common_cells = np.zeros(len(ord_data) + 1, dtype=bool)
	common_cells[1:-1] = ((ord_row[1:] == ord_row[:-1]) &
	(ord_col[1:] == ord_col[:-1]))
	# Last element cannot possibly be a duplicate with indicator 0
	only_mat = (ord_ind == 0) & ~common_cells[1:]
	# First element cannot possibly be a duplicate with indicator 1
	only_other = (ord_ind == 1) & ~common_cells[:-1]

	op = OP_ATTRS.get(op, op)

	if np.any(common_cells):
	# Change values in place, we will remove redundant positions later
	operands = (ord_data[common_cells[:-1]], ord_data[common_cells[1:]])
	method = getattr(operands[0], f'__{op}__')
	ord_data[common_cells[1:]] = method(operands[1])

	ord_data[only_mat] = getattr(ord_data[only_mat], f'__{op}__')(0)
	ord_data[only_other] = getattr(ord_data[only_other], f'__r{op}__')(0)

	# Now "squeeze" by removing duplicate indices:
	new_mat = scipy.sparse.coo_array((ord_data[~common_cells[:-1]],
	(ord_row[~common_cells[:-1]],
	ord_col[~common_cells[:-1]])),
	shape=mat.shape, dtype=ord_data.dtype)

	return new_mat


	def test():
	# 7.28 TiB in dense representation:
	m1 = scipy.sparse.lil_array((106, 106), dtype=int)
	# 1.82 TiB in dense representation:
	m2 = scipy.sparse.lil_array((5105, 510**5), dtype=int)

	coords_1 = np.random.randint(0, 106, (103, 2))
	coords_2 = np.random.randint(0, 105, (103, 2))

	for idx, (x, y) in enumerate(coords_1):
	m1[x, y] = idx

	for idx, (x, y) in enumerate(coords_2):
	m2[x, y] = idx

	# Two cells that will actually add up below:
	m1[1105 + 10, 210**5 + 10] = m2[10, 10] = -1

	m1 = scipy.sparse.coo_array(m1)
	tot_1 = m1.sum()
	m2 = scipy.sparse.coo_array(m2)
	tot_2 = m2.sum()

	rows = np.concatenate([np.arange(1105, 310**5),
	np.arange(4105, 710**5)])
	cols = np.concatenate([np.arange(2105, 510**5),
	np.arange(7105, 910**5)])

	extracted = coo_get_block(m1, rows, cols)
	added = coo_binary_op(extracted, m2)
	coo_set_block(m1, added, rows, cols)

	# Sum from 0 to 999, times 2, minus 2 manually set:
	total = m1.sum()
	assert(total == 998998), total
	the_element = scipy.sparse.csr_array(m1)[1105 + 10, 210**5 + 10]
	assert(the_element == -2), the_element

	if __name__ == '__main__':
	test()