akashjaswal/vectorized_logistic_regression.py

## vectorized_logistic_regression.py
'''
Logistic Regression - Vectorized Implementation w/ Numpy

Setup:
  - features X = Feature Vector of shape (m, n) [Could append bias term to feature matrix with ones(m, 1)]
  - Target y = discrete variable - shape (m, 1) (Consider Binary for now)
  - Weights = Weight matrix of shape (n, 1) - initialize with zeros
  - Standardize features to have zero mean and unit variance.

Gradient Descent Algorithm:
  - Step 1. Calculate z => np.dot(features, weights)
  - Step 2: Calculate y_hat (prob) => Sigmoid => 1 / (1 + e ^ -z)
  - Step 3: Predict class using decision boundary => y_hat > threshold
  - Step 3: Compute Log Loss using Cost Function J = (1/m) * sum(-y * log(y_hat) - (1 - y) * log(1 - y_hat))
  - Step 4: Goal is to Minimize loss with every iteration
  - Step 5: Compute gradient => dJ/dW = np.dot(features, y_pred - y) / m
  - Step 6: Update Weight = weights - learning_rate * gradient
  - Repeat
'''

import numpy as np

epsilon = 1e-7  # to make logloss stable log(0) complains


class LogisticRegression:

    def __init__(self, learning_rate: float = 0.001, standardize: bool = True,
                 fit_intercept: bool = True, decision_threshold: float = 0.5,
                 iterations: int = 500):
        self.learning_rate = learning_rate
        self.standardize = standardize
        self.fit_intercept = fit_intercept
        self.decision_threshold = decision_threshold
        self.iterations = iterations

    def prepare_features(self, X):
        if self.standardize:
            X = (X - np.mean(X, 0)) / np.std(X, 0)
        if self.fit_intercept:
            bias = np.ones((X.shape[0], 1))
            X = np.append(bias, X, axis=1)
        return X

    def init_weight_matrix(self, num_features):
        return np.zeros((num_features, 1))  # Shape: (m, 1)

    def calculate_z(self, features, W):
        return np.dot(features, W)  # shape(m, 1)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))  # shape: (m, 1)

    def predict(self, y_prob):
        return (y_prob > self.decision_threshold).astype(np.float64)  # shape: (m, 1)

    def compute_loss(self, y, y_pred):
        return np.sum(-y * np.log(y_pred + epsilon) - (1 - y) * np.log(1 - y_pred + epsilon)) / len(y)  # scalar

    def compute_gradients(self, features, y, y_pred):
        return np.dot(features.T, (y_pred - y)) / len(y)  # shape: (n, 1)

    def update_weights(self, W, gradients):
        return W - self.learning_rate * gradients  # shape: (n, 1)

    def fit(self, X, y):
        y = y.reshape((len(y), 1))
        features = self.prepare_features(X)
        num_samples, num_features = features.shape
        W = self.init_weight_matrix(num_features)

        for i in range(self.iterations):
            z = self.calculate_z(features, W)
            y_prob = self.sigmoid(z)
            y_pred = self.predict(y_prob)
            loss = self.compute_loss(y, y_pred)
            print(f"Iteration: {i} - Log Loss: {loss}")
            gradients = self.compute_gradients(features, y, y_pred)
            W = self.update_weights(W, gradients)

        return W
	'''
	Logistic Regression - Vectorized Implementation w/ Numpy

	Setup:
	- features X = Feature Vector of shape (m, n) [Could append bias term to feature matrix with ones(m, 1)]
	- Target y = discrete variable - shape (m, 1) (Consider Binary for now)
	- Weights = Weight matrix of shape (n, 1) - initialize with zeros
	- Standardize features to have zero mean and unit variance.

	Gradient Descent Algorithm:
	- Step 1. Calculate z => np.dot(features, weights)
	- Step 2: Calculate y_hat (prob) => Sigmoid => 1 / (1 + e ^ -z)
	- Step 3: Predict class using decision boundary => y_hat > threshold
	- Step 3: Compute Log Loss using Cost Function J = (1/m) * sum(-y * log(y_hat) - (1 - y) * log(1 - y_hat))
	- Step 4: Goal is to Minimize loss with every iteration
	- Step 5: Compute gradient => dJ/dW = np.dot(features, y_pred - y) / m
	- Step 6: Update Weight = weights - learning_rate * gradient
	- Repeat
	'''

	import numpy as np

	epsilon = 1e-7 # to make logloss stable log(0) complains


	class LogisticRegression:

	def __init__(self, learning_rate: float = 0.001, standardize: bool = True,
	fit_intercept: bool = True, decision_threshold: float = 0.5,
	iterations: int = 500):
	self.learning_rate = learning_rate
	self.standardize = standardize
	self.fit_intercept = fit_intercept
	self.decision_threshold = decision_threshold
	self.iterations = iterations

	def prepare_features(self, X):
	if self.standardize:
	X = (X - np.mean(X, 0)) / np.std(X, 0)
	if self.fit_intercept:
	bias = np.ones((X.shape[0], 1))
	X = np.append(bias, X, axis=1)
	return X

	def init_weight_matrix(self, num_features):
	return np.zeros((num_features, 1)) # Shape: (m, 1)

	def calculate_z(self, features, W):
	return np.dot(features, W) # shape(m, 1)

	def sigmoid(self, z):
	return 1 / (1 + np.exp(-z)) # shape: (m, 1)

	def predict(self, y_prob):
	return (y_prob > self.decision_threshold).astype(np.float64) # shape: (m, 1)

	def compute_loss(self, y, y_pred):
	return np.sum(-y * np.log(y_pred + epsilon) - (1 - y) * np.log(1 - y_pred + epsilon)) / len(y) # scalar

	def compute_gradients(self, features, y, y_pred):
	return np.dot(features.T, (y_pred - y)) / len(y) # shape: (n, 1)

	def update_weights(self, W, gradients):
	return W - self.learning_rate * gradients # shape: (n, 1)

	def fit(self, X, y):
	y = y.reshape((len(y), 1))
	features = self.prepare_features(X)
	num_samples, num_features = features.shape
	W = self.init_weight_matrix(num_features)

	for i in range(self.iterations):
	z = self.calculate_z(features, W)
	y_prob = self.sigmoid(z)
	y_pred = self.predict(y_prob)
	loss = self.compute_loss(y, y_pred)
	print(f"Iteration: {i} - Log Loss: {loss}")
	gradients = self.compute_gradients(features, y, y_pred)
	W = self.update_weights(W, gradients)

	return W