akashjaswal/vectorized_linear_regression.py

## vectorized_linear_regression.py
'''
Linear Regression - Vectorized Implementation w/ Numpy

Setup:

  - features X = Feature Vector of shape (m, n) [Could append bias term to feature matrix with ones(m, 1)]
  - Target y = continuous variable - shape (m, 1)

  - Weights = Weight matrix of shape (n, 1) - initialize with zeros
  - Standardize features to have zero mean and unit variance.

Gradient Descent Algorithm:

  - Step 1. Predict => y_pred = np.dot(features, weights)
  - Step 2: Compute Error => y - y_pred
  - Step 3: Compute Loss using Cost Function J = (2/N) * sum(error ** 2)
  - Step 4: Goal is to Minimize loss with every iteration
  - Step 5: Compute gradient => dJ/dX = np.dot(-X.T, error)/N
  - Step 6: Update Weight = weights - learning_rate * gradient
  - Repeat

'''

import numpy as np


class LinearRegression:

    def __init__(self, learning_rate: float = 0.001, iterations: int = 500,
                 normalize: bool = True, fit_intercept: bool = True):
        self.iterations = iterations
        self.learning_rate = learning_rate
        self.normalize = normalize
        self.fit_intercept = fit_intercept

    def normalize_features(self, X: np.ndarray) -> np.ndarray:
        '''
        Normalize features so they are all in the range -1 to 1
        '''
        return (X - np.mean(X, 0)) / np.std(X, 0)  # Shape: (m, n)

    def prepare_features(self, X: np.ndarray) -> np.ndarray:
        if self.normalize:
            X = self.normalize_features(X)
        if self.fit_intercept:
            bias = np.ones(shape=(len(X), 1))
            X = np.append(bias, X, axis=1)
        return X  # Shape: (m, n)

    def init_weight_matrix(self, num_features: int) -> np.ndarray:
        '''
        Zeros Matrix for weights with shape (num_features, 1)
        '''
        return np.zeros((num_features, 1))

    def predict(self, W, features):
        return np.dot(features, W)  # Shape: (m, 1)

    def compute_error(self, y, y_pred):
        return (y - y_pred)  # Shape: (m, 1)

    def compute_loss(self, error, num_samples):
        return (1.0 / num_samples) * np.sum(error ** 2)  # Scalar

    def compute_gradients(self, features, error, num_samples):
        return (-2.0 / num_samples) * np.dot(features.T, error)  # Shape: (n, 1)

    def update_weights(self, W, gradients):
        return W - self.learning_rate * gradients  # Shape: (n, 1)

    def fit(self, X, y):
        y = y.reshape((len(y), 1))
        features = self.prepare_features(X)
        num_samples, num_features = features.shape
        W = self.init_weight_matrix(num_features)

        for i in range(self.iterations):
            y_pred = self.predict(W, features)
            error = self.compute_error(y, y_pred)
            loss = self.compute_loss(error, num_samples)
            gradients = self.compute_gradients(features, error, num_samples)
            W = self.update_weights(W, gradients)
            print(f"Iteration: {i} Loss: {loss}")

        return W
	'''
	Linear Regression - Vectorized Implementation w/ Numpy

	Setup:

	- features X = Feature Vector of shape (m, n) [Could append bias term to feature matrix with ones(m, 1)]
	- Target y = continuous variable - shape (m, 1)

	- Weights = Weight matrix of shape (n, 1) - initialize with zeros
	- Standardize features to have zero mean and unit variance.

	Gradient Descent Algorithm:

	- Step 1. Predict => y_pred = np.dot(features, weights)
	- Step 2: Compute Error => y - y_pred
	- Step 3: Compute Loss using Cost Function J = (2/N) * sum(error ** 2)
	- Step 4: Goal is to Minimize loss with every iteration
	- Step 5: Compute gradient => dJ/dX = np.dot(-X.T, error)/N
	- Step 6: Update Weight = weights - learning_rate * gradient
	- Repeat

	'''

	import numpy as np


	class LinearRegression:

	def __init__(self, learning_rate: float = 0.001, iterations: int = 500,
	normalize: bool = True, fit_intercept: bool = True):
	self.iterations = iterations
	self.learning_rate = learning_rate
	self.normalize = normalize
	self.fit_intercept = fit_intercept

	def normalize_features(self, X: np.ndarray) -> np.ndarray:
	'''
	Normalize features so they are all in the range -1 to 1
	'''
	return (X - np.mean(X, 0)) / np.std(X, 0) # Shape: (m, n)

	def prepare_features(self, X: np.ndarray) -> np.ndarray:
	if self.normalize:
	X = self.normalize_features(X)
	if self.fit_intercept:
	bias = np.ones(shape=(len(X), 1))
	X = np.append(bias, X, axis=1)
	return X # Shape: (m, n)

	def init_weight_matrix(self, num_features: int) -> np.ndarray:
	'''
	Zeros Matrix for weights with shape (num_features, 1)
	'''
	return np.zeros((num_features, 1))

	def predict(self, W, features):
	return np.dot(features, W) # Shape: (m, 1)

	def compute_error(self, y, y_pred):
	return (y - y_pred) # Shape: (m, 1)

	def compute_loss(self, error, num_samples):
	return (1.0 / num_samples) * np.sum(error ** 2) # Scalar

	def compute_gradients(self, features, error, num_samples):
	return (-2.0 / num_samples) * np.dot(features.T, error) # Shape: (n, 1)

	def update_weights(self, W, gradients):
	return W - self.learning_rate * gradients # Shape: (n, 1)

	def fit(self, X, y):
	y = y.reshape((len(y), 1))
	features = self.prepare_features(X)
	num_samples, num_features = features.shape
	W = self.init_weight_matrix(num_features)

	for i in range(self.iterations):
	y_pred = self.predict(W, features)
	error = self.compute_error(y, y_pred)
	loss = self.compute_loss(error, num_samples)
	gradients = self.compute_gradients(features, error, num_samples)
	W = self.update_weights(W, gradients)
	print(f"Iteration: {i} Loss: {loss}")

	return W