Skip to content

Instantly share code, notes, and snippets.

@padjiman
Last active November 3, 2018 16:22
Show Gist options
  • Save padjiman/e4d3314ac5dd2aec5cfb151fb9c849b0 to your computer and use it in GitHub Desktop.
Save padjiman/e4d3314ac5dd2aec5cfb151fb9c849b0 to your computer and use it in GitHub Desktop.
def lin(params,x):
return params[0]*x + params[1]
def linear_gradients(params_to_learn,x,y):
a = params_to_learn[0]
b = params_to_learn[1]
de_da = 2*x*(a*x+b-y)
de_db = 2*(a*x+b-y)
return [de_da,de_db]
def gradient_descent(X,Y, init_params, f, gradients_func, optim, optim_params, epochs=1, anneal_epoch_freq=5):
params_to_learn = init_params.copy()
wavg_grads = [0] * len(params_to_learn) #used if optim = Momentum or Adam
wavg_squared_grads = [0] * len(params_to_learn) #used if optim = Adam
lr_basis = optim_params[0] #used if optim = AdamAnn
for e in range(epochs):
if optim == "AdamAnn" and (e+1) % anneal_epoch_freq == 0:
lr_basis = lr_basis / (4.0) #annealing the gradient
for x_ , y_ in zip(X,Y):
loss = mse(X,Y,f,params_to_learn)
gradients = gradients_func(params_to_learn,x_ ,y_)
if optim == "SGD":
alpha = optim_params[0]
for i in range(len(params_to_learn)):
params_to_learn[i] = params_to_learn[i] - alpha*gradients[i]
elif optim == "Momentum":
alpha = optim_params[0]
beta = optim_params[1]
for i in range(len(params_to_learn)):
wavg_grads[i] = wavg_grads[i]*beta + (1.0-beta)*gradients[i]
params_to_learn[i] = params_to_learn[i] - alpha*wavg_grads[i]
elif optim == "Adam":
lr_basis = optim_params[0]
beta1 = optim_params[1]
beta2 = optim_params[2]
for i in range(len(params_to_learn)):
wavg_grads[i] = wavg_grads[i]*beta1 + (1.0-beta1)*gradients[i]
wavg_squared_grads[i] = wavg_squared_grads[i]*beta2 + (1.0-beta2)*(gradients[i]*gradients[i])
lr = lr_basis / np.sqrt(wavg_squared_grads[i])
params_to_learn[i] = params_to_learn[i] - lr*wavg_grads[i]
elif optim == "AdamAnn":
beta1 = optim_params[1]
beta2 = optim_params[2]
for i in range(len(params_to_learn)):
wavg_grads[i] = wavg_grads[i]*beta1 + (1.0-beta1)*gradients[i]
wavg_squared_grads[i] = wavg_squared_grads[i]*beta2 + (1.0-beta2)*(gradients[i]*gradients[i])
lr = lr_basis / np.sqrt(wavg_squared_grads[i])
params_to_learn[i] = params_to_learn[i] - lr*wavg_grads[i]
print("Learned params with {}: {}".format(optim,params_to_learn,loss))
return params_to_learn
epochs=20
init_params = [0,0]
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"SGD", [0.001], epochs=epochs)
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"Momentum", [0.001,0.9], epochs=epochs)
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"Adam", [1,0.7,0.9], epochs=epochs)
gradient_descent(x_gen,y_real,init_params,lin,linear_gradients,"AdamAnn", [1,0.7,0.9], epochs=epochs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment