jaidevd/main.py

## main.py
import numpy as np
from sklearn.model_selection import train_test_split


def add_dummy_feature(x):
    return np.column_stack((np.ones(x.shape[0]), x))


# Predicting label follows the equation y = Xw, in its vectorized form.
# def predict(X, w):
#     if X.shape[1] == w.shape[0]:
#         return X @ w
#     else:
#         return None


def loss(y_true, y_pred):
    e = y_true - y_pred
    return 0.5 * (e.T @ e)


def calculate_gradient(X, y, w):
    return X.T @ (X @ w - y)


def weight_update(w, lr, g):
    return w - lr * g


def _weights_invalid(weights, i):
    if len(weights) < 2:
        return False
    # Invalid: [-1, 1, -1], [1, -1, 1]
    # Valid: [1, 1, 1], [-1, -1, -1]
    w1, w2 = weights[-3:, 0] > 0, weights[-3:, 1] > 0
    w1_s = np.sort(w1)
    w2_s = np.sort(w2)
    if not np.array_equal(w1, w1_s) and not np.array_equal(w1, w1_s[::-1]):
        return True
    if not np.array_equal(w2, w2_s) and not np.array_equal(w2, w2_s[::-1]):
        return True
    return False


def gradient_descent(epochs):
    # Start with an arbitrary weight vector say, [0, 0].
    # After "many" iterations, we should reach the original weight vector [3,4]
    weights = np.zeros((epochs, 2))
    losses = list()
    w = weights[0]
    for i in range(epochs):
        y_pred = X_train @ w
        l = loss(y_pred, y_train)
        losses.append(l)
        g = calculate_gradient(X_train, y_train, w)
        w = weight_update(w, lr, g)
        weights[i, :] = w
        if _weights_invalid(weights, i):
            raise ValueError('Please check LR, bad values.')
        print(w)
    return w, losses


# main program
w0 = 3  # weight0
w1 = 4  # weight1
# lr = 0.001  # learning rate
lr = 1e-4
epochs = 10000

X = 10 * np.random.random(
    100,
)  # Generate 100 examples.
y = w0 + w1 * X  # Generate the outputs based on pre-determined weight vector [3,4]

# 42 is the seed, and allows same split to occur everytime the function is executed.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train = add_dummy_feature(X_train)  # Add a dummy feature to X_train.

w = np.array([w0, w1])  # Construct the weight vector [3,4]

weights, losses = gradient_descent(epochs)

# I expect that the weights converge to [3,4], but it's increasing forever, why?
	import numpy as np
	from sklearn.model_selection import train_test_split


	def add_dummy_feature(x):
	return np.column_stack((np.ones(x.shape[0]), x))


	# Predicting label follows the equation y = Xw, in its vectorized form.
	# def predict(X, w):
	# if X.shape[1] == w.shape[0]:
	# return X @ w
	# else:
	# return None


	def loss(y_true, y_pred):
	e = y_true - y_pred
	return 0.5 * (e.T @ e)


	def calculate_gradient(X, y, w):
	return X.T @ (X @ w - y)


	def weight_update(w, lr, g):
	return w - lr * g


	def _weights_invalid(weights, i):
	if len(weights) < 2:
	return False
	# Invalid: [-1, 1, -1], [1, -1, 1]
	# Valid: [1, 1, 1], [-1, -1, -1]
	w1, w2 = weights[-3:, 0] > 0, weights[-3:, 1] > 0
	w1_s = np.sort(w1)
	w2_s = np.sort(w2)
	if not np.array_equal(w1, w1_s) and not np.array_equal(w1, w1_s[::-1]):
	return True
	if not np.array_equal(w2, w2_s) and not np.array_equal(w2, w2_s[::-1]):
	return True
	return False


	def gradient_descent(epochs):
	# Start with an arbitrary weight vector say, [0, 0].
	# After "many" iterations, we should reach the original weight vector [3,4]
	weights = np.zeros((epochs, 2))
	losses = list()
	w = weights[0]
	for i in range(epochs):
	y_pred = X_train @ w
	l = loss(y_pred, y_train)
	losses.append(l)
	g = calculate_gradient(X_train, y_train, w)
	w = weight_update(w, lr, g)
	weights[i, :] = w
	if _weights_invalid(weights, i):
	raise ValueError('Please check LR, bad values.')
	print(w)
	return w, losses


	# main program
	w0 = 3 # weight0
	w1 = 4 # weight1
	# lr = 0.001 # learning rate
	lr = 1e-4
	epochs = 10000

	X = 10 * np.random.random(
	100,
	) # Generate 100 examples.
	y = w0 + w1 * X # Generate the outputs based on pre-determined weight vector [3,4]

	# 42 is the seed, and allows same split to occur everytime the function is executed.
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	X_train = add_dummy_feature(X_train) # Add a dummy feature to X_train.

	w = np.array([w0, w1]) # Construct the weight vector [3,4]

	weights, losses = gradient_descent(epochs)

	# I expect that the weights converge to [3,4], but it's increasing forever, why?