Created
January 3, 2022 07:34
-
-
Save jaidevd/338a58fe2e690ef3eb675090e1ca1448 to your computer and use it in GitHub Desktop.
Anand's Gradient Descent
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.model_selection import train_test_split | |
def add_dummy_feature(x): | |
return np.column_stack((np.ones(x.shape[0]), x)) | |
# Predicting label follows the equation y = Xw, in its vectorized form. | |
# def predict(X, w): | |
# if X.shape[1] == w.shape[0]: | |
# return X @ w | |
# else: | |
# return None | |
def loss(y_true, y_pred): | |
e = y_true - y_pred | |
return 0.5 * (e.T @ e) | |
def calculate_gradient(X, y, w): | |
return X.T @ (X @ w - y) | |
def weight_update(w, lr, g): | |
return w - lr * g | |
def _weights_invalid(weights, i): | |
if len(weights) < 2: | |
return False | |
# Invalid: [-1, 1, -1], [1, -1, 1] | |
# Valid: [1, 1, 1], [-1, -1, -1] | |
w1, w2 = weights[-3:, 0] > 0, weights[-3:, 1] > 0 | |
w1_s = np.sort(w1) | |
w2_s = np.sort(w2) | |
if not np.array_equal(w1, w1_s) and not np.array_equal(w1, w1_s[::-1]): | |
return True | |
if not np.array_equal(w2, w2_s) and not np.array_equal(w2, w2_s[::-1]): | |
return True | |
return False | |
def gradient_descent(epochs): | |
# Start with an arbitrary weight vector say, [0, 0]. | |
# After "many" iterations, we should reach the original weight vector [3,4] | |
weights = np.zeros((epochs, 2)) | |
losses = list() | |
w = weights[0] | |
for i in range(epochs): | |
y_pred = X_train @ w | |
l = loss(y_pred, y_train) | |
losses.append(l) | |
g = calculate_gradient(X_train, y_train, w) | |
w = weight_update(w, lr, g) | |
weights[i, :] = w | |
if _weights_invalid(weights, i): | |
raise ValueError('Please check LR, bad values.') | |
print(w) | |
return w, losses | |
# main program | |
w0 = 3 # weight0 | |
w1 = 4 # weight1 | |
# lr = 0.001 # learning rate | |
lr = 1e-4 | |
epochs = 10000 | |
X = 10 * np.random.random( | |
100, | |
) # Generate 100 examples. | |
y = w0 + w1 * X # Generate the outputs based on pre-determined weight vector [3,4] | |
# 42 is the seed, and allows same split to occur everytime the function is executed. | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.2, random_state=42 | |
) | |
X_train = add_dummy_feature(X_train) # Add a dummy feature to X_train. | |
w = np.array([w0, w1]) # Construct the weight vector [3,4] | |
weights, losses = gradient_descent(epochs) | |
# I expect that the weights converge to [3,4], but it's increasing forever, why? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment