yiyuezhuo/grad.py

## grad.py
def compute_jacobian(f, x, output_dims):
    '''
    Normal:
        f: input_dims -> output_dims
    Jacobian mode:
        f: output_dims x input_dims -> output_dims x output_dims
    '''
    repeat_dims = tuple(output_dims) + (1,) * len(x.shape)
    jac_x = x.detach().repeat(*repeat_dims)
    jac_x.requires_grad_()
    jac_y = f(jac_x)

    ml = torch.meshgrid([torch.arange(dim) for dim in output_dims])
    index = [m.flatten() for m in ml]
    gradient = torch.zeros(output_dims + output_dims)
    gradient.__setitem__(tuple(index)*2, 1)

    jac_y.backward(gradient)

    return jac_x.grad.data

## usage.py
w = torch.randn(4,3)
f = lambda x: x @ w
x = torch.randn(2,3,4)
jac = compute_jacobian(f, x, [2,3,3])

'''
>>> w
tensor([[-0.2295,  1.4252,  2.2714],
        [ 0.5877, -2.4398,  0.0136],
        [ 0.3254, -0.3380,  0.1785],
        [ 0.5455,  0.9089, -0.3134]])

>>> jac[1,1,1]
tensor([[[ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 1.4252, -2.4398, -0.3380,  0.9089],
         [ 0.0000,  0.0000,  0.0000,  0.0000]]])
'''
	def compute_jacobian(f, x, output_dims):
	'''
	Normal:
	f: input_dims -> output_dims
	Jacobian mode:
	f: output_dims x input_dims -> output_dims x output_dims
	'''
	repeat_dims = tuple(output_dims) + (1,) * len(x.shape)
	jac_x = x.detach().repeat(*repeat_dims)
	jac_x.requires_grad_()
	jac_y = f(jac_x)

	ml = torch.meshgrid([torch.arange(dim) for dim in output_dims])
	index = [m.flatten() for m in ml]
	gradient = torch.zeros(output_dims + output_dims)
	gradient.__setitem__(tuple(index)*2, 1)

	jac_y.backward(gradient)

	return jac_x.grad.data
	w = torch.randn(4,3)
	f = lambda x: x @ w
	x = torch.randn(2,3,4)
	jac = compute_jacobian(f, x, [2,3,3])

	'''
	>>> w
	tensor([[-0.2295, 1.4252, 2.2714],
	[ 0.5877, -2.4398, 0.0136],
	[ 0.3254, -0.3380, 0.1785],
	[ 0.5455, 0.9089, -0.3134]])

	>>> jac[1,1,1]
	tensor([[[ 0.0000, 0.0000, 0.0000, 0.0000],
	[ 0.0000, 0.0000, 0.0000, 0.0000],
	[ 0.0000, 0.0000, 0.0000, 0.0000]],

	[[ 0.0000, 0.0000, 0.0000, 0.0000],
	[ 1.4252, -2.4398, -0.3380, 0.9089],
	[ 0.0000, 0.0000, 0.0000, 0.0000]]])
	'''