Skip to content

Instantly share code, notes, and snippets.

@lazuxd
Created February 26, 2020 21:03
Show Gist options
  • Save lazuxd/dabf297214bb9992885e8ba1cd62b39f to your computer and use it in GitHub Desktop.
Save lazuxd/dabf297214bb9992885e8ba1cd62b39f to your computer and use it in GitHub Desktop.
Maximum Likelihood Classification
import numpy as np
class MLClassifier:
def fit(self, x: np.ndarray, y: np.ndarray) -> None:
'''
x - numpy array of shape (n, d); n = #observations; d = #variables
y - numpy array of shape (n,)
'''
# no. of variables / dimension
self.d = x.shape[1]
# no. of classes; assumes labels to be integers from 0 to nclasses-1
self.nclasses = len(set(y))
# list of means; mu_list[i] is mean vector for label i
self.mu_list = []
# list of inverse covariance matrices;
# sigma_list[i] is inverse covariance matrix for label i
# for efficiency reasons we store only the inverses
self.sigma_inv_list = []
# list of scalars in front of e^...
self.scalars = []
n = x.shape[0]
for i in range(self.nclasses):
# subset of obesrvations for label i
cls_x = np.array([x[j] for j in range(n) if y[j] == i])
mu = np.mean(cls_x, axis=0)
# rowvar = False, this is to use columns as variables instead of rows
sigma = np.cov(cls_x, rowvar=False)
if np.sum(np.linalg.eigvals(sigma) <= 0) != 0:
# if at least one eigenvalue is <= 0 show warning
print(f'Warning! Covariance matrix for label {cls} is not positive definite!\n')
sigma_inv = np.linalg.inv(sigma)
scalar = 1/np.sqrt(((2*np.pi)**self.d)*np.linalg.det(sigma))
self.mu_list.append(mu)
self.sigma_inv_list.append(sigma_inv)
self.scalars.append(scalar)
def _class_likelihood(self, x: np.ndarray, cls: int) -> float:
'''
x - numpy array of shape (d,)
cls - class label
Returns: likelihood of x under the assumption that class label is cls
'''
mu = self.mu_list[cls]
sigma_inv = self.sigma_inv_list[cls]
scalar = self.scalars[cls]
d = self.d
exp = (-1/2)*np.dot(np.matmul(x-mu, sigma_inv), x-mu)
return scalar * (np.e**exp)
def predict(self, x: np.ndarray) -> int:
'''
x - numpy array of shape (d,)
Returns: predicted label
'''
likelihoods = [self._class_likelihood(x, i) for i in range(self.nclasses)]
return np.argmax(likelihoods)
def score(self, x: np.ndarray, y: np.ndarray) -> float:
'''
x - numpy array of shape (n, d); n = #observations; d = #variables
y - numpy array of shape (n,)
Returns: accuracy of predictions
'''
n = x.shape[0]
predicted_y = np.array([self.predict(x[i]) for i in range(n)])
n_correct = np.sum(predicted_y == y)
return n_correct/n
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment