Skip to content

Instantly share code, notes, and snippets.

@mrgloom
Created May 23, 2014 10:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrgloom/a8e329b7087281ec4f61 to your computer and use it in GitHub Desktop.
Save mrgloom/a8e329b7087281ec4f61 to your computer and use it in GitHub Desktop.
naive pca numpy implementation
def pca(data,k):
#data M x N
#get mean
mean= np.mean(data,axis=0) # N long
# print mean.shape
# print mean
#M x N
data_c= (data-mean)
print data_c.shape
# print data
#N x N
#calculate covariance matrix
covData=np.cov(data_c,rowvar=0)
print covData.shape
eigenvalues, eigenvectors = np.linalg.eig(covData)
print eigenvalues.shape # N long
print eigenvectors.shape # N x N
# print eigenvalues
# print eigenvectors
#sort and get k largest eigenvalues
idx = eigenvalues.argsort()[-k:][::-1]
print idx
eigenvalues = eigenvalues[idx] # k long
eigenvectors = eigenvectors[:,idx] # N x k
print eigenvalues.shape
print eigenvectors.shape
# print eigenvalues
# print eigenvectors
#projection
pr= np.dot(data_c,eigenvectors) # (M N) * (N k) = (M k)
print pr.shape
#reconstruction
rec= np.dot(pr, eigenvectors.T) #(M k) * (N k).T = (M N)
print rec.shape
print (data_c-rec)
#M x N
data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
print data.shape
# print data
k=2
pca(data,k)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment