Created
February 13, 2018 13:48
-
-
Save ffliza/db1c778d8d8f1c877c9010d90b0938e8 to your computer and use it in GitHub Desktop.
PCA with eigen decomposition, Singular value decomposition and matplotlib.mlab PCA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
""" | |
@author: Farhana Ferdousi Liza | |
""" | |
from matplotlib.mlab import PCA | |
import numpy as np | |
def centering(data): # standerd data | |
data -= np.mean(data, 0) | |
data /= np.std(data, 0) | |
return data | |
def cov(data): | |
""" | |
Covariance matrix | |
""" | |
return np.dot(data.T, data) / data.shape[0] | |
def pca_ed(data, pc_count = None): # PCA with eigen decomposition | |
""" | |
Principal component analysis using eigen decomposition | |
""" | |
C = cov(data) | |
E, V = np.linalg.eigh(C) | |
key = np.argsort(E)[::-1][:pc_count] # Sort the eigen values in descending order and | |
E, V = E[key], V[:, key] # reorder the eigenvector accordingly | |
U = np.dot(data, V) # this will result in a rotation, we can represent data in the new basis (v1,v2, ..) | |
return V, U, E # returns [coeff, score, latent] in matlab pca | |
def pca_svd(data, pc_count = None): | |
""" | |
Principal component analysis using singular value decomposition | |
data . V = U . S ==> data = U.S.Vt | |
Vt is the matrix that rotate the data from one basis to another | |
""" | |
U, S, Vt = np.linalg.svd(data) # S is already sorted | |
score = U[:, :pc_count] * S[:pc_count] # this will result in a rotation, we can represent data in the new basis (u1,u2, ..) | |
#score = np.dot(data[:,:pc_count], Vt[:,:pc_count].T) # data . (Vt)t | |
return Vt, score, S**2 # to match the result of matplotlib.mlab PCA, it should be S**2/(n_1), n is number of observation | |
def main(): | |
data = np.random.randn(1000,200) | |
data_centered = centering(data) | |
print(data_centered.shape) | |
coeff, score, latent = pca_svd(data_centered, pc_count = (data_centered.shape[1])) | |
print(latent[:10]) | |
print(score[0:10,1]) | |
results = PCA(data_centered, standardize=False) | |
print(results.s[:10]) | |
print(results.Y[0:10,1]) | |
result = PCA(data) | |
print(result.s[:10]) | |
print(result.Y[0:10,1]) | |
#print(help(PCA)) | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment