Skip to content

Instantly share code, notes, and snippets.

@pervognsen
Last active July 26, 2020 05:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pervognsen/a0e1a026c387352107c072b029f35bf7 to your computer and use it in GitHub Desktop.
Save pervognsen/a0e1a026c387352107c072b029f35bf7 to your computer and use it in GitHub Desktop.
# Version 0: Use slice views to access a matrix like a block matrix.
def blocked_matrix_multiply0(A, B, block_size, panel_size):
m, p, n = A.shape[0], A.shape[1], B.shape[1]
w, h = block_size, panel_size
C = np.zeros((m, n))
for i in range(0, m, w):
for j in range(0, p, h):
row_panel = A[i:i+w, j:j+h]
for k in range(0, n, w):
column_panel = B[j:j+h, k:k+w]
C[i:i+w, k:k+w] += row_panel @ column_panel
return C
# Version 1: Replace slicing with zero-copy block matrix view.
def blocks(A, block_shape):
assert all(m % n == 0 for m, n in zip(A.shape, block_shape))
# Shifts is from https://gist.github.com/pervognsen/0dbe377a146fec65d3d09c07db40e53b#file-tricks-py-L17
return shifts(A, block_shape, block_shape)
def blocked_matrix_multiply1(A, B, block_size, panel_size):
m, p, n = A.shape[0], A.shape[1], B.shape[1]
w, h = block_size, panel_size
out = np.zeros((m, n))
A = blocks(A, (w, h))
B = blocks(B, (h, w))
C = blocks(out, (w, w))
for i in range(A.shape[0]):
for j in range(A.shape[1]):
row_panel = A[i, j]
for k in range(B.shape[1]):
column_panel = B[j, k]
C[i, k] += row_panel @ column_panel
return out
# Version 2: Repack row/column panels into contiguous row-major/column-major order.
def blocked_matrix_multiply2(A, B, block_size, panel_size):
m, p, n = A.shape[0], A.shape[1], B.shape[1]
w, h = block_size, panel_size
out = np.zeros((m, n))
A = blocks(A, (w, h))
B = blocks(B, (h, w))
C = blocks(out, (w, w))
for i in range(A.shape[0]):
for j in range(A.shape[1]):
row_panel = A[i, j].copy('C')
for k in range(B.shape[1]):
column_panel = B[j, k].copy('F')
C[i, k] += row_panel @ column_panel
return out
# Version 3: Transpose loop order to prioritize the column-major repacking.
def blocked_matrix_multiply3(A, B, block_size, panel_size):
m, p, n = A.shape[0], A.shape[1], B.shape[1]
w, h = block_size, panel_size
out = np.zeros((m, n))
A = blocks(A, (w, h))
B = blocks(B, (h, w))
C = blocks(out, (w, w))
for j in range(B.shape[0]):
for k in range(B.shape[1]):
column_panel = B[j, k].copy('F')
for i in range(A.shape[0]):
row_panel = A[i, j].copy('C')
C[i, k] += row_panel @ column_panel
return out
# Version 4: Split up the microkernel and macrokernel for reusability and decoupling.
def matrix_multiply_microkernel(C, A, B):
C += A @ B
def blocked_macrokernel1(A, B, block_size, panel_size, microkernel):
m, p, n = A.shape[0], A.shape[1], B.shape[1]
w, h = block_size, panel_size
out = np.zeros((m, n))
A = blocks(A, (w, h))
B = blocks(B, (h, w))
C = blocks(out, (w, w))
for j in range(B.shape[0]):
for k in range(B.shape[1]):
column_panel = B[j, k].copy('F')
for i in range(A.shape[0]):
row_panel = A[i, j].copy('C')
microkernel(C[i, k], row_panel, column_panel)
return out
def blocked_matrix_multiply4(A, B, block_size, panel_size):
return blocked_macrokernel1(A, B, block_size, panel_size, matrix_multiply_microkernel)
# Version 5: In-place buffering, optional out parameter.
def blocked_macrokernel2(A, B, block_size, panel_size, microkernel, out=None):
m, p, n = A.shape[0], A.shape[1], B.shape[1]
w, h = block_size, panel_size
out = np.zeros((m, n)) if out is None else out
row_panel = np.empty((w, h), order='C')
column_panel = np.empty((h, w), order='F')
A = blocks(A, (w, h))
B = blocks(B, (h, w))
C = blocks(out, (w, w))
for j in range(B.shape[0]):
for k in range(B.shape[1]):
np.copyto(column_panel, B[j, k])
for i in range(A.shape[0]):
np.copyto(row_panel, A[i, j])
microkernel(C[i, k], row_panel, column_panel)
return out
def blocked_matrix_multiply5(A, B, block_size, panel_size):
return blocked_macrokernel2(A, B, block_size, panel_size, matrix_multiply_microkernel)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment