Skip to content

Instantly share code, notes, and snippets.

@jaidevd
Created January 3, 2022 07:34
Show Gist options
  • Save jaidevd/338a58fe2e690ef3eb675090e1ca1448 to your computer and use it in GitHub Desktop.
Save jaidevd/338a58fe2e690ef3eb675090e1ca1448 to your computer and use it in GitHub Desktop.
Anand's Gradient Descent
import numpy as np
from sklearn.model_selection import train_test_split
def add_dummy_feature(x):
return np.column_stack((np.ones(x.shape[0]), x))
# Predicting label follows the equation y = Xw, in its vectorized form.
# def predict(X, w):
# if X.shape[1] == w.shape[0]:
# return X @ w
# else:
# return None
def loss(y_true, y_pred):
e = y_true - y_pred
return 0.5 * (e.T @ e)
def calculate_gradient(X, y, w):
return X.T @ (X @ w - y)
def weight_update(w, lr, g):
return w - lr * g
def _weights_invalid(weights, i):
if len(weights) < 2:
return False
# Invalid: [-1, 1, -1], [1, -1, 1]
# Valid: [1, 1, 1], [-1, -1, -1]
w1, w2 = weights[-3:, 0] > 0, weights[-3:, 1] > 0
w1_s = np.sort(w1)
w2_s = np.sort(w2)
if not np.array_equal(w1, w1_s) and not np.array_equal(w1, w1_s[::-1]):
return True
if not np.array_equal(w2, w2_s) and not np.array_equal(w2, w2_s[::-1]):
return True
return False
def gradient_descent(epochs):
# Start with an arbitrary weight vector say, [0, 0].
# After "many" iterations, we should reach the original weight vector [3,4]
weights = np.zeros((epochs, 2))
losses = list()
w = weights[0]
for i in range(epochs):
y_pred = X_train @ w
l = loss(y_pred, y_train)
losses.append(l)
g = calculate_gradient(X_train, y_train, w)
w = weight_update(w, lr, g)
weights[i, :] = w
if _weights_invalid(weights, i):
raise ValueError('Please check LR, bad values.')
print(w)
return w, losses
# main program
w0 = 3 # weight0
w1 = 4 # weight1
# lr = 0.001 # learning rate
lr = 1e-4
epochs = 10000
X = 10 * np.random.random(
100,
) # Generate 100 examples.
y = w0 + w1 * X # Generate the outputs based on pre-determined weight vector [3,4]
# 42 is the seed, and allows same split to occur everytime the function is executed.
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
X_train = add_dummy_feature(X_train) # Add a dummy feature to X_train.
w = np.array([w0, w1]) # Construct the weight vector [3,4]
weights, losses = gradient_descent(epochs)
# I expect that the weights converge to [3,4], but it's increasing forever, why?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment