ntjess/conv2dimpls.py

## conv2dimpls.py
import numpy as np
import torch
from torch.nn.functional import conv2d
from skimage.util.shape import view_as_windows

# Try with values that don't have transpose symmetry
input_matrix_ = np.array(
    [[3,9,0,1],
     [2,8,1,1],
     [3,4,8,1],
     [2,2,2,2]],
    dtype=np.float32
)
kernel_ = np.array(
    [[8, 9],
    [4, 4]],
    dtype=np.float32
)
bias_ = np.array([0.06], dtype=np.float32)

# Convenience functions
to_t = lambda arr: torch.Tensor(arr)
to_np = lambda arr: np.array(arr)

def conv2d_naive(input_matrix, kernel, bias):
    kernel_shape = np.array(kernel.shape[:2])
    out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1
    out = np.empty(out_shape, dtype=np.float32)
    # Suggested change 1: iterate over correct bounds
    # Python works with the code you wrote since range(input_shape-1) happened to
    # match output_shape in the toy example
    # "shape-1" is not needed, since python stops its iteration *before* the stop value
    for row in range(out_shape[0]):
        for col in range(out_shape[1]):
            patch = input_matrix[row:row + kernel_shape[0], col:col + kernel_shape[1]]
            # Minor suggested change: replace np.multiply with the clearer "*" operation
            out[row, col] = np.sum(patch * kernel)
    return out + bias

def conv2d_im2col(input_matrix, kernel, bias):
    kernel_shape = np.array(kernel.shape[:2])
    out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1
    rows = []
    for row in range(out_shape[0]):
        for col in range(out_shape[1]):
            patch = input_matrix[row:row + kernel_shape[0], col:col + kernel_shape[1]]
            rows.append(patch.flatten())
    # Suggested change 2: For broadcasting to work, you must not transpose these values
    # It worked in the video due to the example matrix shapes, but does not work for
    # arbitrary combinations
    rows = np.array(rows)
    # Minor suggested change: Use matrix multiply "@" rather than np.dot to convey clarity
    # (but this is not necessary)
    conv = (rows @ kernel.flatten()) + bias
    # Minor change: Reshape output to be 2D (this wasn't done in the video)
    return conv.reshape(out_shape)

def conv2d_strided(input_matrix, kernel, bias):
    kernel_shape = np.array(kernel.shape[:2])
    out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1
    kernel_shape_flat = np.prod(kernel_shape)
    strides = view_as_windows(input_matrix, kernel_shape).reshape(-1, kernel_shape_flat)
    conv = (strides @ kernel.flatten()) + bias
    return conv.reshape(out_shape)

def conv2d_pytorch(input_matrix, kernel, bias):
    return conv2d(to_t(input_matrix).unsqueeze(0).unsqueeze(0), to_t(kernel).unsqueeze(0).unsqueeze(0), to_t(bias))

print(conv2d_im2col(input_matrix_, kernel_, bias_))
print(conv2d_naive(input_matrix_, kernel_, bias_))
print(conv2d_strided(input_matrix_, kernel_, bias_))
# Undo the "unsqueeze" ops by selecting only the salient values
# Convert back to numpy for easy comparisons to previous output
print(to_np(conv2d_pytorch(input_matrix_, kernel_, bias_))[0,0,...].round(2))
	import numpy as np
	import torch
	from torch.nn.functional import conv2d
	from skimage.util.shape import view_as_windows

	# Try with values that don't have transpose symmetry
	input_matrix_ = np.array(
	[[3,9,0,1],
	[2,8,1,1],
	[3,4,8,1],
	[2,2,2,2]],
	dtype=np.float32
	)
	kernel_ = np.array(
	[[8, 9],
	[4, 4]],
	dtype=np.float32
	)
	bias_ = np.array([0.06], dtype=np.float32)

	# Convenience functions
	to_t = lambda arr: torch.Tensor(arr)
	to_np = lambda arr: np.array(arr)

	def conv2d_naive(input_matrix, kernel, bias):
	kernel_shape = np.array(kernel.shape[:2])
	out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1
	out = np.empty(out_shape, dtype=np.float32)
	# Suggested change 1: iterate over correct bounds
	# Python works with the code you wrote since range(input_shape-1) happened to
	# match output_shape in the toy example
	# "shape-1" is not needed, since python stops its iteration before the stop value
	for row in range(out_shape[0]):
	for col in range(out_shape[1]):
	patch = input_matrix[row:row + kernel_shape[0], col:col + kernel_shape[1]]
	# Minor suggested change: replace np.multiply with the clearer "*" operation
	out[row, col] = np.sum(patch * kernel)
	return out + bias

	def conv2d_im2col(input_matrix, kernel, bias):
	kernel_shape = np.array(kernel.shape[:2])
	out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1
	rows = []
	for row in range(out_shape[0]):
	for col in range(out_shape[1]):
	patch = input_matrix[row:row + kernel_shape[0], col:col + kernel_shape[1]]
	rows.append(patch.flatten())
	# Suggested change 2: For broadcasting to work, you must not transpose these values
	# It worked in the video due to the example matrix shapes, but does not work for
	# arbitrary combinations
	rows = np.array(rows)
	# Minor suggested change: Use matrix multiply "@" rather than np.dot to convey clarity
	# (but this is not necessary)
	conv = (rows @ kernel.flatten()) + bias
	# Minor change: Reshape output to be 2D (this wasn't done in the video)
	return conv.reshape(out_shape)

	def conv2d_strided(input_matrix, kernel, bias):
	kernel_shape = np.array(kernel.shape[:2])
	out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1
	kernel_shape_flat = np.prod(kernel_shape)
	strides = view_as_windows(input_matrix, kernel_shape).reshape(-1, kernel_shape_flat)
	conv = (strides @ kernel.flatten()) + bias
	return conv.reshape(out_shape)

	def conv2d_pytorch(input_matrix, kernel, bias):
	return conv2d(to_t(input_matrix).unsqueeze(0).unsqueeze(0), to_t(kernel).unsqueeze(0).unsqueeze(0), to_t(bias))

	print(conv2d_im2col(input_matrix_, kernel_, bias_))
	print(conv2d_naive(input_matrix_, kernel_, bias_))
	print(conv2d_strided(input_matrix_, kernel_, bias_))
	# Undo the "unsqueeze" ops by selecting only the salient values
	# Convert back to numpy for easy comparisons to previous output
	print(to_np(conv2d_pytorch(input_matrix_, kernel_, bias_))[0,0,...].round(2))