Skip to content

Instantly share code, notes, and snippets.

@shikhargoswami
Created February 4, 2020 21:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shikhargoswami/bb66f7795bc2e76aa9654d62273c5de6 to your computer and use it in GitHub Desktop.
Save shikhargoswami/bb66f7795bc2e76aa9654d62273c5de6 to your computer and use it in GitHub Desktop.
This script is an implementation of Principal Component Analysis
import numpy as np
from sklearn.preprocessing import StandardScaler
class PCA:
def __init__(self,n_components=None):
self.n_components=n_components
self.variance_ratio=0
def transform(self,X_data):
return self.fit(X_data)
def fit(self,X):
n_samples,n_features=X.shape
#Standardize data
X_std = StandardScaler().fit_transform(X)
#center data
mean= np.mean(X_std,axis=0)
#Covariance matrix
cov_mat= (X_std-mean).T.dot(X_std-mean)/(X_std.shape[0]-1)
#Eigen Decomposition
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
u,s,v = np.linalg.svd(X_std.T)
# u is same as cov_mat
#Making tuple (eigen_value,eigen_vector)
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
#Sorting in reverse order
eig_pairs.sort()
eig_pairs.reverse()
#Explained Variance (to choose prominent principal components)
explained_var = (s** 2) / (n_samples - 1)
total_var = explained_var.sum()
var_ratio=explained_var/total_var
self.variance_ratio=var_ratio
# Creating k dimensional eigen matrix w
matrix_w = np.hstack((eig_pairs[0][1].reshape(n_features,1), eig_pairs[1][1].reshape(n_features,1)))
Y= X_std.dot(matrix_w)
return Y
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment