Created
October 24, 2018 15:48
-
-
Save muhammadgaffar/3805883044c93823f47e642fe130fec0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def initialize_adam(parameters) : | |
L = len(parameters) // 2 # number of layers in the neural networks | |
v = {} | |
s = {} | |
# Initialize v, s. Input: "parameters". Outputs: "v, s". | |
for l in range(L): | |
v["dW" + str(l+1)] = np.zeros((parameters["W" + str(l+1)].shape[0],parameters["W" + str(l+1)].shape[1])) | |
v["db" + str(l+1)] = np.zeros((parameters["b" + str(l+1)].shape[0],parameters["b" + str(l+1)].shape[1])) | |
s["dW" + str(l+1)] = np.zeros((parameters["W" + str(l+1)].shape[0],parameters["W" + str(l+1)].shape[1])) | |
s["db" + str(l+1)] = np.zeros((parameters["b" + str(l+1)].shape[0],parameters["b" + str(l+1)].shape[1])) | |
return v, s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01, | |
beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8): | |
L = len(parameters) // 2 # number of layers in the neural networks | |
v_corrected = {} # Initializing first moment estimate, python dictionary | |
s_corrected = {} # Initializing second moment estimate, python dictionary | |
# Perform Adam update on all parameters | |
for l in range(L): | |
v["dW" + str(l+1)] = beta1*v["dW" + str(l+1)]+(1-beta1)*grads["dW" + str(l+1)] | |
v["db" + str(l+1)] = beta1*v["db" + str(l+1)]+(1-beta1)*grads["db" + str(l+1)] | |
# Compute bias-corrected first moment estimate. Inputs: "v, beta1, t". Output: "v_corrected". | |
v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)]/(1-beta1**t) | |
v_corrected["db" + str(l+1)] = v["db" + str(l+1)]/(1-beta1**t) | |
# Moving average of the squared gradients. Inputs: "s, grads, beta2". Output: "s". | |
s["dW" + str(l+1)] = beta2*s["dW" + str(l+1)]+(1-beta2)*(grads["dW" + str(l+1)])**2 | |
s["db" + str(l+1)] = beta2*s["db" + str(l+1)]+(1-beta2)*(grads["db" + str(l+1)])**2 | |
# Compute bias-corrected second raw moment estimate. Inputs: "s, beta2, t". Output: "s_corrected". | |
s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)]/(1-beta2**t) | |
s_corrected["db" + str(l+1)] = s["db" + str(l+1)]/(1-beta2**t) | |
# Update parameters. Inputs: "parameters, learning_rate, v_corrected, s_corrected, epsilon". Output: "parameters". | |
parameters["W" + str(l+1)] = parameters["W" + str(l+1)]-learning_rate*v_corrected["dW" + str(l+1)]/(np.sqrt(s_corrected["dW" + str(l+1)])+epsilon) | |
parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*v_corrected["db" + str(l+1)]/(np.sqrt(s_corrected["db" + str(l+1)])+epsilon) | |
return parameters, v, s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment