mw3i/pca_in_numpy.py

## pca_in_numpy.py
'''
Implementation of PCA with Numpy (using covariance), based on this tutorial by Sebastian Raschka: https://sebastianraschka.com/Articles/2014_pca_step_by_step.html
'''
import numpy as np

def get_components(data: np.ndarray) -> np.ndarray:
    cov_mat = np.cov(data.T) # <-- get the covariance matrix

    ## calculate eigenvalues of the covariance matrix
    eig_val, eig_vec = np.linalg.eig(cov_mat)

    # sort components, largest to smallest
    idx_sort = np.flip(eig_val.argsort()) # <-- get ordering of eigenvectors: largest to smallest
    components = eig_vec[:,idx_sort]
    return components


## run example:
if __name__ == '__main__':

    ##__Generate Data
    data = np.random.normal(0,1,[100,3]) # <-- generates random data (assuming rows are instances, & columns are "features")

    ##__Get Components
    components = get_components(data)

    ##__Transform data using top 2 components (ie, matmul)
    num_components = 2
    transformed_data = data @ components[:,:num_components]
	'''
	Implementation of PCA with Numpy (using covariance), based on this tutorial by Sebastian Raschka: https://sebastianraschka.com/Articles/2014_pca_step_by_step.html
	'''
	import numpy as np

	def get_components(data: np.ndarray) -> np.ndarray:
	cov_mat = np.cov(data.T) # <-- get the covariance matrix

	## calculate eigenvalues of the covariance matrix
	eig_val, eig_vec = np.linalg.eig(cov_mat)

	# sort components, largest to smallest
	idx_sort = np.flip(eig_val.argsort()) # <-- get ordering of eigenvectors: largest to smallest
	components = eig_vec[:,idx_sort]
	return components


	## run example:
	if __name__ == '__main__':

	##__Generate Data
	data = np.random.normal(0,1,[100,3]) # <-- generates random data (assuming rows are instances, & columns are "features")

	##__Get Components
	components = get_components(data)

	##__Transform data using top 2 components (ie, matmul)
	num_components = 2
	transformed_data = data @ components[:,:num_components]