|
""" |
|
Sparse matrices don't broadcast, but this awesome solution was on SO: |
|
https://stackoverflow.com/a/64920528/1519199 |
|
|
|
I added commentary and switched the logic from pairwise max to pairwise min |
|
""" |
|
|
|
import numba as nb |
|
import numpy as np |
|
from scipy import sparse |
|
|
|
def sparse_elementwise_minimum(mat, vec, shrink_to_fit=True): |
|
""" pairwise minimum between vec and each row of `mat` |
|
""" |
|
mat_csr = mat.tocsr() |
|
vec_csr = vec.tocsr() |
|
|
|
shape_mat = mat_csr.shape |
|
indices_mat = mat_csr.indices |
|
indptr_mat = mat_csr.indptr |
|
data_mat = mat_csr.data |
|
indices_vec = vec_csr.indices |
|
data_vec = vec_csr.data |
|
|
|
res = sparse_elementwise_minimum_nb( |
|
indices_mat, |
|
indptr_mat, |
|
data_mat, |
|
shape_mat, |
|
indices_vec, |
|
data_vec, |
|
shrink_to_fit, |
|
) |
|
|
|
@nb.njit(cache=True) |
|
def sparse_elementwise_minimum_nb( |
|
indices_mat, |
|
indptr_mat, |
|
data_mat, |
|
shape_mat, |
|
vec_row_ind, |
|
vec_row_data, |
|
shrink_to_fit, |
|
): |
|
# the maximum number non-zero elements |
|
max_non_zero = shape_mat[0] * vec_row_data.shape[0] + data_mat.shape[0] |
|
# output data |
|
data_res = np.empty(max_non_zero, dtype=data_mat.dtype) |
|
# output (row) indexes |
|
indices_res = np.empty(max_non_zero, dtype=np.int32) |
|
# pointer to the current data / row index |
|
data_res_p = 0 |
|
|
|
# output pointer (row slices) (always has length of nrow + 1) |
|
indptr_mat_res = np.empty((shape_mat[0] + 1), dtype=np.int32) |
|
indptr_mat_res[0] = 0 |
|
indptr_mat_res_p = 1 |
|
indptr_mat_ = 0 |
|
|
|
# for each row in the matrix |
|
for row_idx in range(shape_mat[0]): |
|
mat_row_ind = indices_mat[indptr_mat[row_idx] : indptr_mat[row_idx + 1]] |
|
mat_row_data = data_mat[indptr_mat[row_idx] : indptr_mat[row_idx + 1]] |
|
|
|
# for each matrix / vec value in the row (with the usual lazy pointer incrementation trick) |
|
mat_ptr = 0 |
|
vec_ptr = 0 |
|
while mat_ptr < mat_row_ind.shape[0] and vec_ptr < vec_row_ind.shape[0]: |
|
ind_mat = mat_row_ind[mat_ptr] |
|
ind_vec = vec_row_ind[vec_ptr] |
|
|
|
# value for both matrix and vector is present |
|
if ind_mat == ind_vec: |
|
data_res[data_res_p] = min( |
|
mat_row_data[mat_ptr], vec_row_data[vec_ptr] |
|
) # do the math |
|
indices_res[data_res_p] = ind_mat # set the output column index |
|
data_res_p += 1 # increement the output data pointer |
|
mat_ptr += 1 # increment the mattrix row pointer |
|
vec_ptr += 1 # increment the vector row pointer |
|
indptr_mat_ += 1 # increment the output slice pointer |
|
|
|
# only value for the matrix is present; vector is 0 by definition |
|
elif ind_mat < ind_vec: |
|
if mat_row_data[mat_ptr] < 0: |
|
data_res[data_res_p] = mat_row_data[mat_ptr] # do the math |
|
indices_res[data_res_p] = ind_mat # set the output column index |
|
data_res_p += 1 # increement the output data pointer |
|
indptr_mat_ += 1 # increment the output slice pointer |
|
mat_ptr += 1 # increment the mattrix row pointer |
|
|
|
# only value for the vector is present; matrix is 0 by definition |
|
else: |
|
if vec_row_data[vec_ptr] < 0: |
|
data_res[data_res_p] = vec_row_data[vec_ptr] # do the math |
|
indices_res[data_res_p] = ind_vec # set the output column index |
|
data_res_p += 1 # increement the output data pointer |
|
indptr_mat_ += 1 # increment the output slice pointer |
|
vec_ptr += 1 # increment the vector row pointer |
|
|
|
# At this point mat_ptr or vec_ptr may have reached the end of the row, |
|
# but the *other* one may not have, so finish up with the other one if necessary. |
|
# Aside: Two coins add up to 30 cents and one of them isn't a nickel... (the other one is) |
|
|
|
for i in range(mat_ptr, mat_row_ind.shape[0]): |
|
if mat_row_data[i] < 0: |
|
data_res[data_res_p] = mat_row_data[i] |
|
indices_res[data_res_p] = mat_row_ind[i] |
|
data_res_p += 1 # increement the output data pointer |
|
indptr_mat_ += 1 # increment the output slice pointer |
|
for i in range(vec_ptr, vec_row_ind.shape[0]): |
|
if vec_row_data[i] < 0: |
|
data_res[data_res_p] = vec_row_data[i] |
|
indices_res[data_res_p] = vec_row_ind[i] |
|
data_res_p += 1 # increement the output data pointer |
|
indptr_mat_ += 1 # increment the output slice pointer |
|
indptr_mat_res[indptr_mat_res_p] = indptr_mat_ |
|
indptr_mat_res_p += 1 |
|
|
|
if shrink_to_fit == True: |
|
data_res = np.copy(data_res[:data_res_p]) |
|
indices_res = np.copy(indices_res[:data_res_p]) |
|
else: |
|
data_res = data_res[:data_res_p] |
|
indices_res = indices_res[:data_res_p] |
|
|
|
return data_res, indices_res, indptr_mat_res |
|
|