Mehdi-Amine/backpropagation.py

## backpropagation.py
#----------- Using our differentiations -----------#

ce_p = crossentropy_prime(sm, y) # = [[ 0.0000,  0.0000, -3.3230]]

sm_p = softmax_prime(z) # = [[ 0.1919, -0.1140, -0.0779],
                          #  [-0.1140,  0.2464, -0.1324],
                          #  [-0.0779, -0.1324,  0.2104]])

z_p_w = torch.stack(([x]*3)).squeeze()  # Recall: z' w.r.t the weights is equal to x
z_p_b = torch.ones_like(b)              # Recall: z' w.r.t the biases is equal to 1

# Backwards from cross-entropy to softmax
ce_sm = (ce_p @ sm_p.T)

# Backwards from softmax to z
our_w_grad = ce_sm.T * z_p_w
our_b_grad = ce_sm * z_p_b

#----------- Using Pytorch Autograd -----------#
t_ce.backward()
t_w_grad = w.grad
t_b_grad = b.grad

#----------- Comparing Outputs -----------#
print(f"Pytorch w_grad: \n{t_w_grad} \nPytorch b_grad: \n{t_b_grad}")
print(f"Math w_grad: \n{our_w_grad} \nMath b_grad: \n{our_b_grad}")
'''
Out:
Pytorch w_grad:
tensor([[ 0.2331,  0.1295,  0.0777],
        [ 0.3960,  0.2200,  0.1320],
        [-0.6292, -0.3495, -0.2097]])
Pytorch b_grad:
tensor([[ 0.2590,  0.4401, -0.6991]])

Math w_grad:
tensor([[ 0.2331,  0.1295,  0.0777],
        [ 0.3960,  0.2200,  0.1320],
        [-0.6292, -0.3495, -0.2097]])
Math b_grad:
tensor([[ 0.2590,  0.4401, -0.6991]])
'''
	#----------- Using our differentiations -----------#

	ce_p = crossentropy_prime(sm, y) # = [[ 0.0000, 0.0000, -3.3230]]

	sm_p = softmax_prime(z) # = [[ 0.1919, -0.1140, -0.0779],
	# [-0.1140, 0.2464, -0.1324],
	# [-0.0779, -0.1324, 0.2104]])

	z_p_w = torch.stack(([x]*3)).squeeze() # Recall: z' w.r.t the weights is equal to x
	z_p_b = torch.ones_like(b) # Recall: z' w.r.t the biases is equal to 1

	# Backwards from cross-entropy to softmax
	ce_sm = (ce_p @ sm_p.T)

	# Backwards from softmax to z
	our_w_grad = ce_sm.T * z_p_w
	our_b_grad = ce_sm * z_p_b

	#----------- Using Pytorch Autograd -----------#
	t_ce.backward()
	t_w_grad = w.grad
	t_b_grad = b.grad

	#----------- Comparing Outputs -----------#
	print(f"Pytorch w_grad: \n{t_w_grad} \nPytorch b_grad: \n{t_b_grad}")
	print(f"Math w_grad: \n{our_w_grad} \nMath b_grad: \n{our_b_grad}")
	'''
	Out:
	Pytorch w_grad:
	tensor([[ 0.2331, 0.1295, 0.0777],
	[ 0.3960, 0.2200, 0.1320],
	[-0.6292, -0.3495, -0.2097]])
	Pytorch b_grad:
	tensor([[ 0.2590, 0.4401, -0.6991]])

	Math w_grad:
	tensor([[ 0.2331, 0.1295, 0.0777],
	[ 0.3960, 0.2200, 0.1320],
	[-0.6292, -0.3495, -0.2097]])
	Math b_grad:
	tensor([[ 0.2590, 0.4401, -0.6991]])
	'''