mrgloom/gist:a8e329b7087281ec4f61

## gistfile1.txt
def pca(data,k):
	#data M x N
	#get mean
	mean= np.mean(data,axis=0) # N long
	# print mean.shape
	# print mean

	#M x N
	data_c= (data-mean)
	print data_c.shape
	# print data

	#N x N
	#calculate covariance matrix
	covData=np.cov(data_c,rowvar=0)
	print covData.shape

	eigenvalues, eigenvectors = np.linalg.eig(covData)
	print eigenvalues.shape # N long
	print eigenvectors.shape # N x N
	# print eigenvalues
	# print eigenvectors

	#sort and get k largest eigenvalues
	idx = eigenvalues.argsort()[-k:][::-1]
	print idx

	eigenvalues = eigenvalues[idx] # k long
	eigenvectors = eigenvectors[:,idx] # N x k
	print eigenvalues.shape
	print eigenvectors.shape
	# print eigenvalues
	# print eigenvectors

	#projection
	pr= np.dot(data_c,eigenvectors) # (M N) * (N k) = (M k)
	print pr.shape
	#reconstruction
	rec= np.dot(pr, eigenvectors.T) #(M k) * (N k).T = (M N)
	print rec.shape

	print (data_c-rec)

#M x N
data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
print data.shape
# print data
k=2
pca(data,k)
	def pca(data,k):
	#data M x N
	#get mean
	mean= np.mean(data,axis=0) # N long
	# print mean.shape
	# print mean

	#M x N
	data_c= (data-mean)
	print data_c.shape
	# print data

	#N x N
	#calculate covariance matrix
	covData=np.cov(data_c,rowvar=0)
	print covData.shape

	eigenvalues, eigenvectors = np.linalg.eig(covData)
	print eigenvalues.shape # N long
	print eigenvectors.shape # N x N
	# print eigenvalues
	# print eigenvectors

	#sort and get k largest eigenvalues
	idx = eigenvalues.argsort()[-k:][::-1]
	print idx

	eigenvalues = eigenvalues[idx] # k long
	eigenvectors = eigenvectors[:,idx] # N x k
	print eigenvalues.shape
	print eigenvectors.shape
	# print eigenvalues
	# print eigenvectors

	#projection
	pr= np.dot(data_c,eigenvectors) # (M N) * (N k) = (M k)
	print pr.shape
	#reconstruction
	rec= np.dot(pr, eigenvectors.T) #(M k) * (N k).T = (M N)
	print rec.shape

	print (data_c-rec)

	#M x N
	data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
	print data.shape
	# print data
	k=2
	pca(data,k)