Skip to content

Instantly share code, notes, and snippets.

Last active October 29, 2022 20:48
Show Gist options
  • Save BALaka-18/a0351a51cbf941ffc526d2ae84275a58 to your computer and use it in GitHub Desktop.
Save BALaka-18/a0351a51cbf941ffc526d2ae84275a58 to your computer and use it in GitHub Desktop.
Implementing Principal Component Analysis from scratch
'''Question : Create a python class PCA in “” to implement PCA (Principle component analysis).
The deliverable is a class that can be used as follows:
from pca import PCA
… .
… .
pca_model = PCA(n_component=2)
from numpy import mean,std,cov
from numpy.linalg import eig
import scipy.linalg as la
import numpy as np
import pandas
class PCA:
# Initialize
def __init__(self, n_components=None, whiten=False):
self.n_components = n_components
self.whiten = bool(whiten)
# .fit()
def fit(self, X):
r,c = X.shape
# Subtracting mean to center the data/Mean normalization = mean(X,axis=0)
X = X -
# Feature scaling if necessary
if self.whiten:
self.stdv = std(X,axis=0)
X = X/self.stdv
# Step 1:
A = np.array(X)
C = cov(A.T) # Calculating covariance matrix
# Calculating eigen values and eigen vectors
self.eigval, self.eigvect = eig(C)
# Step 2:
# Truncating for dimensionality reduction
if self.n_components is not None:
self.eigval = self.eigval[:self.n_components]
self.eigvect = self.eigvect[:, :self.n_components]
# Step 3:
# Sorting the eigen values and vectors in descending order
desc_ord = np.flip(np.argsort(self.eigval)) # Indices returned are for ascending order. Flipping to return indices in descending order.
self.eigval = self.eigval[desc_ord]
self.eigvect = self.eigvect[:, desc_ord]
return self
# Transforming the original matrix(of features) to the reduced form.
# .transform()
def transform(self, X):
X = X -
if self.whiten:
X = X/self.stdv
return X @ self.eigvect # Step 4
# Defining the property explained_variance_ratio. This is not a method
# .explained_variance_ratio
def variance_ratio(self):
return (self.eigval/np.sum(self.eigval))*100 # How much each eigen value contributes to the variance in the data.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment