Skip to content

Instantly share code, notes, and snippets.

@zommiommy
Last active May 31, 2022 14:20
Show Gist options
  • Save zommiommy/24f712f22eee4af763bbddbc1fbe86c2 to your computer and use it in GitHub Desktop.
Save zommiommy/24f712f22eee4af763bbddbc1fbe86c2 to your computer and use it in GitHub Desktop.
Manual Logistic Regression with SGD, Momentum, Adam, Nadam
import time
import numpy as np
from tqdm.auto import trange
import matplotlib.pyplot as plt
from optimizers import SGD, Momentum, Adam, Nadam
# Generate some data
positives = np.random.normal(loc=-0.3, size=1000)
negatives = np.random.normal(loc=0.0, size=1000)
# hyper params
compute_loss = True
n_of_samples = len(positives) + len(negatives)
n_of_epochs = 1_000
seed = 31337
for opt in [SGD, Momentum, Adam, Nadam]:
start_time = time.time()
# init values
np.random.seed(seed)
m, b = np.random.normal(), np.random.normal()
# optimizers
optm = Adam(learning_rate=0.001)
optb = Adam(learning_rate=0.001)
losses = np.zeros(n_of_epochs)
# training loop
for epoch in trange(n_of_epochs, leave=False):
# gradients
gradient_m, gradient_b = 0, 0
# compute the gradient for the positives
for x in positives:
z = m * x + b
p = 1.0 / (1.0 + np.exp(-z))
gradient_b += (p - 1)
gradient_m += (p - 1) * x
# compute the gradient for the negatives
for x in negatives:
z = m * x + b
p = 1.0 / (1.0 + np.exp(-z))
gradient_b += p
gradient_m += p * x
# update the weights
m -= optm.gradient_update(gradient_m / n_of_samples)
b -= optb.gradient_update(gradient_b / n_of_samples)
if compute_loss:
for positive in positives:
losses[epoch] += np.log(1 + np.exp(-m*positive-b))
for negative in negatives:
losses[epoch] -= np.log(1 - 1/(1 + np.exp(-m*negative-b)))
print(f"{opt.__name__:<10}:{time.time() - start_time:.3f}s\tm:{m}\tb:{b}\tloss:{losses[-1]}")
if compute_loss:
plt.plot(losses, label=opt.__name__)
plt.legend()
plt.ylabel("Binary Cross Entropy")
plt.xlabel("Epoch")
plt.show()
import numpy as np
class Optimizer:
def gradient_update(self, gradient: float) -> float:
raise NotImplementedError("The child should implement this")
class SGD(Optimizer):
"""Standard stochastic gradient descent"""
def __init__(self, learning_rate: float=0.001):
self.learning_rate = learning_rate
def gradient_update(self, gradient: float) -> float:
return self.learning_rate * gradient
class Momentum(Optimizer):
"""Classical momentum (Polyak, 1964)"""
def __init__(self, decay_factor: float=0.9, learning_rate: float=0.001):
self.decay_factor, self.learning_rate = decay_factor, learning_rate
self.momentum = 0
def gradient_update(self, gradient: float) -> float:
self.momentum = self.decay_factor * self.momentum +\
self.learning_rate * gradient
return self.momentum
class Adam(Optimizer):
def __init__(self, beta1: float=0.9, beta2:float=0.999, epsilon: float=1e-8, learning_rate: float=0.001):
self.beta1, self.beta2, self.epsilon, self.learning_rate = beta1, beta2, epsilon, learning_rate
self.first_moment = 0
self.second_moment = 0
self.t = 0
def gradient_update(self, gradient: float) -> float:
self.t += 1
self.first_moment = self.beta1 * self.first_moment + \
(1 - self.beta1) * gradient
self.second_moment = self.beta2 * self.second_moment + \
(1 - self.beta2) * gradient**2
alpha_t = self.learning_rate * (1 - self.beta2**self.t)**0.5 / (1 - self.beta1**self.t)
return alpha_t * self.first_moment / (self.epsilon + self.second_moment**0.5)
class Nadam(Optimizer):
def __init__(self, beta1: float=0.9, beta2:float=0.999, epsilon: float=1e-8, learning_rate: float=0.001):
self.beta1, self.beta2, self.epsilon, self.learning_rate = beta1, beta2, epsilon, learning_rate
self.first_moment = 0
self.second_moment = 0
self.t = 0
def gradient_update(self, gradient: float) -> float:
self.t += 1
self.first_moment = self.beta1 * self.first_moment + \
(1 - self.beta1) * gradient
self.second_moment = self.beta2 * self.second_moment + \
(1 - self.beta2) * gradient**2
estimated_first_moment = self.beta1 * self.first_moment / (1 - self.beta1**(self.t+1)) + \
(1 - self.beta1) * gradient / (1 - self.beta1**self.t)
estimated_second_moment = self.beta2 * self.second_moment / (1 - self.beta2**self.t)
return self.learning_rate * estimated_first_moment / (self.epsilon + estimated_second_moment**0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment