Last active
October 29, 2022 20:48
-
-
Save BALaka-18/a0351a51cbf941ffc526d2ae84275a58 to your computer and use it in GitHub Desktop.
Implementing Principal Component Analysis from scratch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Question : Create a python class PCA in “pca.py” to implement PCA (Principle component analysis). | |
The deliverable is a class that can be used as follows: | |
from pca import PCA | |
… . | |
… . | |
pca_model = PCA(n_component=2) | |
pca_model.fit(x) | |
print(pca_model.variance_ratio) | |
print(pca_model.transform(data))''' | |
from numpy import mean,std,cov | |
from numpy.linalg import eig | |
import scipy.linalg as la | |
import numpy as np | |
import pandas | |
class PCA: | |
# Initialize | |
def __init__(self, n_components=None, whiten=False): | |
self.n_components = n_components | |
self.whiten = bool(whiten) | |
# .fit() | |
def fit(self, X): | |
r,c = X.shape | |
# Subtracting mean to center the data/Mean normalization | |
self.mn = mean(X,axis=0) | |
X = X - self.mn | |
# Feature scaling if necessary | |
if self.whiten: | |
self.stdv = std(X,axis=0) | |
X = X/self.stdv | |
# MAIN : PERFORMING SVD/EIGEN DECOMPOSITION OF X | |
# Step 1: | |
A = np.array(X) | |
C = cov(A.T) # Calculating covariance matrix | |
# Calculating eigen values and eigen vectors | |
self.eigval, self.eigvect = eig(C) | |
# Step 2: | |
# Truncating for dimensionality reduction | |
if self.n_components is not None: | |
self.eigval = self.eigval[:self.n_components] | |
self.eigvect = self.eigvect[:, :self.n_components] | |
# Step 3: | |
# Sorting the eigen values and vectors in descending order | |
desc_ord = np.flip(np.argsort(self.eigval)) # Indices returned are for ascending order. Flipping to return indices in descending order. | |
self.eigval = self.eigval[desc_ord] | |
self.eigvect = self.eigvect[:, desc_ord] | |
return self | |
# Transforming the original matrix(of features) to the reduced form. | |
# .transform() | |
def transform(self, X): | |
X = X - self.mn | |
if self.whiten: | |
X = X/self.stdv | |
return X @ self.eigvect # Step 4 | |
# Defining the property explained_variance_ratio. This is not a method | |
# .explained_variance_ratio | |
@property | |
def variance_ratio(self): | |
return (self.eigval/np.sum(self.eigval))*100 # How much each eigen value contributes to the variance in the data. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment