Skip to content

Instantly share code, notes, and snippets.

@kamath
Last active February 22, 2021 04:09
Show Gist options
  • Save kamath/0c6c6d21c1cba45c954d07e987425bb4 to your computer and use it in GitHub Desktop.
Save kamath/0c6c6d21c1cba45c954d07e987425bb4 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits import mplot3d
import seaborn as sns
sns.set()
'''
@ signs were used here to denote matrix multiplication since writing it all out in numpy got verbose
'''
def calculate_boundary(X,mu_list,sigma,pi_list):
mu_k, mu_l = mu_list
pi_k, pi_l = pi_list
diff = np.linalg.inv(sigma)@(mu_k - mu_l)
print("DIFF", diff)
return (np.log(pi_k / pi_l) - 1/2 * (mu_k + mu_l).T @ np.linalg.inv(sigma)@(mu_k - mu_l) + X.T @ diff).flatten()[0]
def LDA_score(X,mu_k,SIGMA,pi_k):
return (np.log(pi_k) - 1/2 * (mu_k).T @ np.linalg.inv(SIGMA)@(mu_k) + X.T @ np.linalg.inv(SIGMA)@(mu_k)).flatten()[0]
def predict_LDA_class(X,mu_list,sigma,pi_list):
scores_list = []
classes = len(mu_list)
for p in range(classes):
score = LDA_score(X.reshape(-1,1),mu_list[p].reshape(-1,1),sigma,pi_list[0])
scores_list.append(score)
return np.argmax(scores_list)
# Label 1 X1, etc.
L1X1 = np.array([3.81, .23, 3.05, 0.68, 2.67])
L1X2 = np.array([-.55, 3.37, 3.53, 1.84, 2.74])
L2X1 = np.array([-2.04, -.72, -2.46, -3.51, -2.05])
L2X2 = np.array([-1.25, -3.35, -1.31, 0.13, -2.82])
L1 = np.array(list(zip(L1X1, L1X2)))
L2 = np.array(list(zip(L2X1, L2X2)))
# Concatenate L1 and L2
data = np.array([list(a) + [1] for a in L1] + [list(a) + [2] for a in L2])
df = pd.DataFrame(data, columns = ["X1", "X2", "y"])
# Mean by class
mu_list = df.groupby('y').mean().values
mu_list = [a.reshape(-1, 1) for a in mu_list]
print("MU VALUES")
for i, a in enumerate(mu_list):
print(f"mu_{i + 1} = {a}")
sigma = df[["X1", "X2"]].cov().values
print("SIGMA")
print(sigma)
pi_list = df.iloc[:,2].value_counts().values / len(df)
# Setup grid to plot on
N = 5
X = np.linspace(min(df["X1"]), max(df["X1"]), N)
Y = np.linspace(min(df["X2"]), max(df["X2"]), N)
X, Y = np.meshgrid(X, Y)
g = sns.FacetGrid(df, hue="y", size=10).map(plt.scatter,"X1", "X2").add_legend()
ax = g.ax
boundary = np.array([calculate_boundary(np.array([xx,yy]).reshape(-1,1), mu_list, sigma, pi_list)
for xx, yy in zip(np.ravel(X), np.ravel(Y))]).reshape(X.shape)
ax.contour(X, Y, boundary, levels = [0])
# Get slope
print(calculate_boundary(np.array([0,0]).reshape(-1,1), mu_list, sigma, pi_list))
ax.set_xlabel('L1')
ax.set_ylabel('L2')
plt.show()
'''
MU VALUES
mu_1 = [[2.088]
[2.186]]
mu_2 = [[-2.156]
[-1.72 ]]
SIGMA
[[6.52282667 3.51129111]
[3.51129111 6.32677889]]
x = [0, 0]^T -> boundary = -.069 and sigma^-1 * delta(mu) = [.4539, .365]
Boundary Line: X1 * .4539 + X2 * .3655 - .069 = 0
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment