Created
February 10, 2012 19:05
-
-
Save stober/1791820 to your computer and use it in GitHub Desktop.
Principle Component Analysis in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# Unlike other implementations, this can handle very high dimensional data (like images). | |
def compute_pca(data): | |
m = np.mean(data, axis=0) | |
datac = np.array([obs - m for obs in data]) | |
T = np.dot(datac, datac.T) | |
[u,s,v] = np.linalg.svd(T) | |
# here iteration is over rows but the columns are the eigenvectors of T | |
pcs = [np.dot(datac.T, item) for item in u.T ] | |
# note that the eigenvectors are not normed after multiplication by T^T | |
pcs = np.array([d / np.linalg.norm(d) for d in pcs]) | |
return pcs, m, s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment