Skip to content

Instantly share code, notes, and snippets.

@mapa17
Created July 10, 2018 16:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mapa17/94d91c3297930e8123a439ea0c5ef349 to your computer and use it in GitHub Desktop.
Save mapa17/94d91c3297930e8123a439ea0c5ef349 to your computer and use it in GitHub Desktop.
# Addressed issues in SO post
# https://stackoverflow.com/questions/51268386/sum-matrix-elements-group-by-indices-in-python
import numpy as np
A = np.array([[0.52,0.25,-0.45,0.13],[-0.14,-0.41,0.31,-0.41]])
B = np.array([[1,3,1,2],[3,0,2,2]])
bigA = np.tile(A, (200, 200))
bigB = np.tile(B, (200, 200))
RESULT = {0: -0.41, 1: 0.07, 2: 0.03, 3: 0.11}
def matrix_sum_by_indices(indices,matrix):
a = np.hstack(indices)
b = np.hstack(matrix)
sidx = a.argsort()
split_idx = np.flatnonzero(np.diff(a[sidx])>0)+1
out = np.split(b[sidx], split_idx)
return [sum(x) for x in out]
def wrapper_matrix_sum_by_indices(packed):
return matrix_sum_by_indices(packed[0], packed[1])
print('matrix_sum_by_indices results ...')
print(matrix_sum_by_indices(bigB, bigA))
def using_bincount(indices, matrx):
N = indices.max() + 1
id = indices + (N*np.arange(indices.shape[0]))[:, None] # since you can't apply bincount to a 2D array
return np.sum(np.bincount(id.ravel(), matrx.ravel()).reshape(matrx.shape[0], -1), axis=0)
def wrapper_using_bincounts(packed):
return using_bincount(packed[0], packed[1])
### Multiprocessing solution
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
from functools import partial
def fast_matrix_multi_with_executor(func, indices, matrix, executor, ncpu=None):
# Take all cpus
ncpu = ncpu or multiprocessing.cpu_count()
# Assume indices to be a positive integer without holes!
nIndices = indices.max()
# Use data parallelism, and split matrices between processes
sindices = np.array_split(indices, ncpu)
smatrix = np.array_split(matrix, ncpu)
packed = zip(sindices, smatrix)
mapfunc = partial(func)
# Use executor
sums = executor.map(mapfunc, packed)
# Unpack
partial_sums = [x for x in sums]
# Calculate complte sum
sum = np.sum(partial_sums, axis=0)
return sum
with ProcessPoolExecutor(max_workers=None) as executor:
print('fast_matrix_multi_with_executor results ...')
print(fast_matrix_multi_with_executor(wrapper_using_bincounts, bigB, bigA, executor))
### TIME execution
import timeit
print('matrix_sum_by_indices runtime ...')
print(timeit.timeit('matrix_sum_by_indices(bigB, bigA)', number=200, globals=globals()))
print('fast_matrix_multi with persistent executors and matrix_sum_by_indices runtime...')
with ProcessPoolExecutor(max_workers=None) as executor:
print(timeit.timeit('fast_matrix_multi_with_executor(wrapper_matrix_sum_by_indices, bigB, bigA, executor)', number=200, globals=globals()))
print('fast_matrix_multi with persistent executors and wrapper_using_bincounts runtime...')
with ProcessPoolExecutor(max_workers=None) as executor:
print(timeit.timeit('fast_matrix_multi_with_executor(wrapper_using_bincounts, bigB, bigA, executor)', number=200, globals=globals()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment