Skip to content

Instantly share code, notes, and snippets.

@ffliza
Created February 13, 2018 13:48
Show Gist options
  • Save ffliza/db1c778d8d8f1c877c9010d90b0938e8 to your computer and use it in GitHub Desktop.
Save ffliza/db1c778d8d8f1c877c9010d90b0938e8 to your computer and use it in GitHub Desktop.
PCA with eigen decomposition, Singular value decomposition and matplotlib.mlab PCA
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
@author: Farhana Ferdousi Liza
"""
from matplotlib.mlab import PCA
import numpy as np
def centering(data): # standerd data
data -= np.mean(data, 0)
data /= np.std(data, 0)
return data
def cov(data):
"""
Covariance matrix
"""
return np.dot(data.T, data) / data.shape[0]
def pca_ed(data, pc_count = None): # PCA with eigen decomposition
"""
Principal component analysis using eigen decomposition
"""
C = cov(data)
E, V = np.linalg.eigh(C)
key = np.argsort(E)[::-1][:pc_count] # Sort the eigen values in descending order and
E, V = E[key], V[:, key] # reorder the eigenvector accordingly
U = np.dot(data, V) # this will result in a rotation, we can represent data in the new basis (v1,v2, ..)
return V, U, E # returns [coeff, score, latent] in matlab pca
def pca_svd(data, pc_count = None):
"""
Principal component analysis using singular value decomposition
data . V = U . S ==> data = U.S.Vt
Vt is the matrix that rotate the data from one basis to another
"""
U, S, Vt = np.linalg.svd(data) # S is already sorted
score = U[:, :pc_count] * S[:pc_count] # this will result in a rotation, we can represent data in the new basis (u1,u2, ..)
#score = np.dot(data[:,:pc_count], Vt[:,:pc_count].T) # data . (Vt)t
return Vt, score, S**2 # to match the result of matplotlib.mlab PCA, it should be S**2/(n_1), n is number of observation
def main():
data = np.random.randn(1000,200)
data_centered = centering(data)
print(data_centered.shape)
coeff, score, latent = pca_svd(data_centered, pc_count = (data_centered.shape[1]))
print(latent[:10])
print(score[0:10,1])
results = PCA(data_centered, standardize=False)
print(results.s[:10])
print(results.Y[0:10,1])
result = PCA(data)
print(result.s[:10])
print(result.Y[0:10,1])
#print(help(PCA))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment