kamath/problem3.py

## problem3.py
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits import mplot3d
import seaborn as sns

sns.set()

'''
@ signs were used here to denote matrix multiplication since writing it all out in numpy got verbose
'''

def calculate_boundary(X,mu_list,sigma,pi_list):
    mu_k, mu_l = mu_list
    pi_k, pi_l = pi_list
    diff = np.linalg.inv(sigma)@(mu_k - mu_l)
    print("DIFF", diff)
    return (np.log(pi_k / pi_l) - 1/2 * (mu_k + mu_l).T @ np.linalg.inv(sigma)@(mu_k - mu_l) + X.T @ diff).flatten()[0]


def LDA_score(X,mu_k,SIGMA,pi_k):
    return (np.log(pi_k) - 1/2 * (mu_k).T @ np.linalg.inv(SIGMA)@(mu_k) + X.T @ np.linalg.inv(SIGMA)@(mu_k)).flatten()[0]

def predict_LDA_class(X,mu_list,sigma,pi_list):
    scores_list = []
    classes = len(mu_list)

    for p in range(classes):
        score = LDA_score(X.reshape(-1,1),mu_list[p].reshape(-1,1),sigma,pi_list[0])
        scores_list.append(score)

    return np.argmax(scores_list)

# Label 1 X1, etc.
L1X1 = np.array([3.81, .23, 3.05, 0.68, 2.67])
L1X2 = np.array([-.55, 3.37, 3.53, 1.84, 2.74])
L2X1 = np.array([-2.04, -.72, -2.46, -3.51, -2.05])
L2X2 = np.array([-1.25, -3.35, -1.31, 0.13, -2.82])

L1 = np.array(list(zip(L1X1, L1X2)))
L2 = np.array(list(zip(L2X1, L2X2)))

# Concatenate L1 and L2
data = np.array([list(a) + [1] for a in L1] + [list(a) + [2] for a in L2])
df = pd.DataFrame(data, columns = ["X1", "X2", "y"])

# Mean by class
mu_list = df.groupby('y').mean().values
mu_list = [a.reshape(-1, 1) for a in mu_list]

print("MU VALUES")
for i, a in enumerate(mu_list):
    print(f"mu_{i + 1} = {a}")

sigma = df[["X1", "X2"]].cov().values
print("SIGMA")
print(sigma)

pi_list = df.iloc[:,2].value_counts().values / len(df)

# Setup grid to plot on
N = 5
X = np.linspace(min(df["X1"]), max(df["X1"]), N)
Y = np.linspace(min(df["X2"]), max(df["X2"]), N)
X, Y = np.meshgrid(X, Y)

g = sns.FacetGrid(df, hue="y", size=10).map(plt.scatter,"X1", "X2").add_legend()
ax = g.ax

boundary = np.array([calculate_boundary(np.array([xx,yy]).reshape(-1,1), mu_list, sigma, pi_list)
                    for xx, yy in zip(np.ravel(X), np.ravel(Y))]).reshape(X.shape)

ax.contour(X, Y, boundary, levels = [0])

# Get slope
print(calculate_boundary(np.array([0,0]).reshape(-1,1), mu_list, sigma, pi_list))

ax.set_xlabel('L1')
ax.set_ylabel('L2')

plt.show()

'''
MU VALUES
mu_1 = [[2.088]
 [2.186]]
mu_2 = [[-2.156]
 [-1.72 ]]

SIGMA
[[6.52282667 3.51129111]
 [3.51129111 6.32677889]]

x = [0, 0]^T -> boundary = -.069 and sigma^-1 * delta(mu) = [.4539, .365]
Boundary Line: X1 * .4539 + X2 * .3655 - .069 = 0
'''
	import numpy as np
	import pandas as pd
	from matplotlib import pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	from mpl_toolkits import mplot3d
	import seaborn as sns

	sns.set()

	'''
	@ signs were used here to denote matrix multiplication since writing it all out in numpy got verbose
	'''

	def calculate_boundary(X,mu_list,sigma,pi_list):
	mu_k, mu_l = mu_list
	pi_k, pi_l = pi_list
	diff = np.linalg.inv(sigma)@(mu_k - mu_l)
	print("DIFF", diff)
	return (np.log(pi_k / pi_l) - 1/2 * (mu_k + mu_l).T @ np.linalg.inv(sigma)@(mu_k - mu_l) + X.T @ diff).flatten()[0]


	def LDA_score(X,mu_k,SIGMA,pi_k):
	return (np.log(pi_k) - 1/2 * (mu_k).T @ np.linalg.inv(SIGMA)@(mu_k) + X.T @ np.linalg.inv(SIGMA)@(mu_k)).flatten()[0]

	def predict_LDA_class(X,mu_list,sigma,pi_list):
	scores_list = []
	classes = len(mu_list)

	for p in range(classes):
	score = LDA_score(X.reshape(-1,1),mu_list[p].reshape(-1,1),sigma,pi_list[0])
	scores_list.append(score)

	return np.argmax(scores_list)

	# Label 1 X1, etc.
	L1X1 = np.array([3.81, .23, 3.05, 0.68, 2.67])
	L1X2 = np.array([-.55, 3.37, 3.53, 1.84, 2.74])
	L2X1 = np.array([-2.04, -.72, -2.46, -3.51, -2.05])
	L2X2 = np.array([-1.25, -3.35, -1.31, 0.13, -2.82])

	L1 = np.array(list(zip(L1X1, L1X2)))
	L2 = np.array(list(zip(L2X1, L2X2)))

	# Concatenate L1 and L2
	data = np.array([list(a) + [1] for a in L1] + [list(a) + [2] for a in L2])
	df = pd.DataFrame(data, columns = ["X1", "X2", "y"])

	# Mean by class
	mu_list = df.groupby('y').mean().values
	mu_list = [a.reshape(-1, 1) for a in mu_list]

	print("MU VALUES")
	for i, a in enumerate(mu_list):
	print(f"mu_{i + 1} = {a}")

	sigma = df[["X1", "X2"]].cov().values
	print("SIGMA")
	print(sigma)

	pi_list = df.iloc[:,2].value_counts().values / len(df)

	# Setup grid to plot on
	N = 5
	X = np.linspace(min(df["X1"]), max(df["X1"]), N)
	Y = np.linspace(min(df["X2"]), max(df["X2"]), N)
	X, Y = np.meshgrid(X, Y)

	g = sns.FacetGrid(df, hue="y", size=10).map(plt.scatter,"X1", "X2").add_legend()
	ax = g.ax

	boundary = np.array([calculate_boundary(np.array([xx,yy]).reshape(-1,1), mu_list, sigma, pi_list)
	for xx, yy in zip(np.ravel(X), np.ravel(Y))]).reshape(X.shape)

	ax.contour(X, Y, boundary, levels = [0])

	# Get slope
	print(calculate_boundary(np.array([0,0]).reshape(-1,1), mu_list, sigma, pi_list))

	ax.set_xlabel('L1')
	ax.set_ylabel('L2')

	plt.show()

	'''
	MU VALUES
	mu_1 = [[2.088]
	[2.186]]
	mu_2 = [[-2.156]
	[-1.72 ]]

	SIGMA
	[[6.52282667 3.51129111]
	[3.51129111 6.32677889]]

	x = [0, 0]^T -> boundary = -.069 and sigma^-1 * delta(mu) = [.4539, .365]
	Boundary Line: X1 * .4539 + X2 * .3655 - .069 = 0
	'''