Skip to content

Instantly share code, notes, and snippets.

@bkj
Last active May 31, 2019 18:31
Show Gist options
  • Save bkj/77bf8eabb52b1dfac41c69085e07fd3d to your computer and use it in GitHub Desktop.
Save bkj/77bf8eabb52b1dfac41c69085e07fd3d to your computer and use it in GitHub Desktop.
sgd_optimizers.py
#!/usr/bin/env python
"""
sgd_optimizers.py
Pseudocode for optimizers
These _should be_ identical to Pytorch implementation of the optimizers
"""
# --
# No Nesterov momentum
class SGDWithMomentum:
def __init__(self, params, lr=0.1, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.velocity = 0
def step(self, weight, grad):
self.velocity = (self.velocity * self.momentum) + grad
weight = weight - self.lr * self.velocity
return weight
# --
# Nesterov momentum
class SGDWithNesterovMomentum:
def __init__(self, params, lr=0.1, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.velocity = 0
def step(self, weight, grad):
self.velocity = (self.velocity * self.momentum) + grad
weight = weight - self.lr * (grad + (self.momentum * self.velocity))
return weight
# --
# Adam Optimizer
class SimpleAdam(Optimizer):
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8):
self.lr = lr
self.beta1 = betas[0]
self.beta2 = betas[1]
self.eps = eps
self.step_num = 0
self.eps = eps
self.exp_avg = 0
self.exp_avg_sq = 0
def step(self, weight, grad):
self.step_num += 1
self.exp_avg = self.beta1 * self.exp_avg + (1 - self.beta1) * grad
self.exp_avg_sq = self.beta2 * self.exp_avg_sq + (1 - self.beta2) * grad * grad
denom = sqrt(self.exp_avg_sq) + self.eps
bias_correction1 = 1 - self.beta1 ** self.step_num
bias_correction2 = 1 - self.beta2 ** self.step_num
step_size = self.lr * sqrt(bias_correction2) / bias_correction1
weight = weight - step_size * (self.exp_avg / denom)
return weight
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment