Mehdi-Amine/softmax_prime.py

## softmax_prime.py
import torch
import torch.nn.functional as F

#----------- Implementing the math -----------#
def softmax(z):
  return z.exp() / z.exp().sum(axis=1, keepdim=True)

def softmax_prime(z):
  sm = softmax(z).squeeze()
  sm_size = sm.shape[0]
  sm_ps = []
  for i, sm_i in enumerate(sm):
    for j, sm_j in enumerate(sm):
      # First case: i and j are equal:
      if(i==j):
        # Differentiating the softmax of a neuron w.r.t to itself
        sm_p = sm_i * (1 - sm_i)
        sm_ps.append(sm_p)
      # Second case: i and j are not equal:
      else:
        # Differentiating the softmax of a neuron w.r.t to another neuron
        sm_p = -sm_i * sm_j
        sm_ps.append(sm_p)
  sm_ps = torch.tensor(sm_ps).view(sm_size, sm_size)
  return sm_ps


z = torch.tensor([[4., 2.]], requires_grad=True)
sm_p = softmax_prime(z)

#----------- Using Pytorch autograd -----------#

torch_sm = F.softmax(z, dim=1)

# to extract the first row in the jacobian matrix, use [[1., 0]]
# retain_graph=True because we re-use backward() for the second row
torch_sm.backward(torch.tensor([[1.,0.]]), retain_graph=True)
r1 = z.grad
z.grad = torch.zeros_like(z)
# to extract the second row in the jacobian matrix, use [[0., 1.]]
torch_sm.backward(torch.tensor([[0.,1.]]))
r2 = z.grad
torch_sm_p = torch.cat((r1,r2))

#----------- Comparing outputs -----------#

print(f"Pytorch Softmax': \n{torch_sm_p} \nOur Softmax': \n{sm_p}")
'''
Out:
Pytorch Softmax':
tensor([[ 0.1050, -0.1050],
        [-0.1050,  0.1050]])
Our Softmax':
tensor([[ 0.1050, -0.1050],
        [-0.1050,  0.1050]])
'''
	import torch
	import torch.nn.functional as F

	#----------- Implementing the math -----------#
	def softmax(z):
	return z.exp() / z.exp().sum(axis=1, keepdim=True)

	def softmax_prime(z):
	sm = softmax(z).squeeze()
	sm_size = sm.shape[0]
	sm_ps = []
	for i, sm_i in enumerate(sm):
	for j, sm_j in enumerate(sm):
	# First case: i and j are equal:
	if(i==j):
	# Differentiating the softmax of a neuron w.r.t to itself
	sm_p = sm_i * (1 - sm_i)
	sm_ps.append(sm_p)
	# Second case: i and j are not equal:
	else:
	# Differentiating the softmax of a neuron w.r.t to another neuron
	sm_p = -sm_i * sm_j
	sm_ps.append(sm_p)
	sm_ps = torch.tensor(sm_ps).view(sm_size, sm_size)
	return sm_ps


	z = torch.tensor([[4., 2.]], requires_grad=True)
	sm_p = softmax_prime(z)

	#----------- Using Pytorch autograd -----------#

	torch_sm = F.softmax(z, dim=1)

	# to extract the first row in the jacobian matrix, use [[1., 0]]
	# retain_graph=True because we re-use backward() for the second row
	torch_sm.backward(torch.tensor([[1.,0.]]), retain_graph=True)
	r1 = z.grad
	z.grad = torch.zeros_like(z)
	# to extract the second row in the jacobian matrix, use [[0., 1.]]
	torch_sm.backward(torch.tensor([[0.,1.]]))
	r2 = z.grad
	torch_sm_p = torch.cat((r1,r2))

	#----------- Comparing outputs -----------#

	print(f"Pytorch Softmax': \n{torch_sm_p} \nOur Softmax': \n{sm_p}")
	'''
	Out:
	Pytorch Softmax':
	tensor([[ 0.1050, -0.1050],
	[-0.1050, 0.1050]])
	Our Softmax':
	tensor([[ 0.1050, -0.1050],
	[-0.1050, 0.1050]])
	'''