Skip to content

Instantly share code, notes, and snippets.

View muhammadgaffar's full-sized avatar

mgaffar muhammadgaffar

View GitHub Profile
def softmax(x):
return np.exp(x) / np.sum(np.exp(x), axis=0)
def batchnorm_backward(dout, cache):
#unfold the variables stored in cache
xhat,gamma,xmu,ivar,sqrtvar,var,eps = cache
#get the dimensions of the input/output
N,D = dout.shape
#step9
dbeta = np.sum(dout, axis=0)
dgammax = dout #not necessary, but more understandable
#step8
dgamma = np.sum(dgammax*xhat, axis=0)
def initialize_adam(parameters) :
L = len(parameters) // 2 # number of layers in the neural networks
v = {}
s = {}
# Initialize v, s. Input: "parameters". Outputs: "v, s".
for l in range(L):
v["dW" + str(l+1)] = np.zeros((parameters["W" + str(l+1)].shape[0],parameters["W" + str(l+1)].shape[1]))
v["db" + str(l+1)] = np.zeros((parameters["b" + str(l+1)].shape[0],parameters["b" + str(l+1)].shape[1]))
def update_parameters_with_momentum(parameters, grads, v, beta, learning_rate):
L = len(parameters) // 2 # number of layers in the neural networks
# Momentum update for each parameter
for l in range(L):
# compute velocities
v["dW" + str(l+1)] = beta*v["dW" + str(l+1)]+(1-beta)*grads["dW"+str(l+1)]
v["db" + str(l+1)] = beta*v["db" + str(l+1)]+(1-beta)*grads["db"+str(l+1)]
# update parameters
def initialize_velocity(parameters):
L = len(parameters) // 2 # number of layers in the neural networks
v = {}
# Initialize velocity
for l in range(L):
v["dW" + str(l+1)] = np.zeros((parameters["W" + str(l+1)].shape[0],parameters["W" + str(l+1)].shape[1]))
v["db" + str(l+1)] = np.zeros((parameters["b" + str(l+1)].shape[0],parameters["b" + str(l+1)].shape[1]))
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
m = X.shape[1] # number of training examples
mini_batches = []
# Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m))
shuffled_X = X[:, permutation]
shuffled_Y = Y[:, permutation].reshape((1,m))
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
parameters_values, _ = dictionary_to_vector(parameters)
grad = gradients_to_vector(gradients)
num_parameters = parameters_values.shape[0]
J_plus = np.zeros((num_parameters, 1))
J_minus = np.zeros((num_parameters, 1))
gradapprox = np.zeros((num_parameters, 1))
# Compute gradapprox
def gradients_to_vector(gradients):
"""
Roll all our gradients dictionary into a single vector satisfying our specific required shape.
"""
count = 0
params = []
num_params = len(parameters)//2
for l in range(1,num_params+1):
params = params + ["dW"+str(l)]
def dictionary_to_vector(parameters):
"""
Roll all our parameters dictionary into a single vector satisfying our specific required shape.
"""
keys = []
count = 0
params = []
num_params = len(parameters)//2
for l in range(1,num_params+1):
params = params + ["W"+str(l)]
def initialize_parameters_he(layer_dims):
parameters = {}
L = len(layer_dims)
for l in range(1,L):
parameters['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*np.sqrt(2./(layer_dims[l-1]))
parameters['b'+str(l)] = np.zeros((layer_dims[l],1))
return parameters