Skip to content

Instantly share code, notes, and snippets.

@SaiVinay007
Last active November 26, 2018 11:12
Show Gist options
  • Save SaiVinay007/627963da0bb514b72abbccccc4d0a70a to your computer and use it in GitHub Desktop.
Save SaiVinay007/627963da0bb514b72abbccccc4d0a70a to your computer and use it in GitHub Desktop.
PCA
def Reduced_data_set(dataset,final_dimension, plot = False):
'''
This function is used to perform pca
and retain final_dimension number of
principle component from our dataset
'''
# Centering all the features to zero mean
Mean_of_data=dataset.mean(0)
centered_data=dataset-Mean_of_data
# Calculating the Covariance Matrix
covariance_matrix=np.cov(centered_data.T)
# Computing the eigenvalues and eigenvectors of the Covariance Matrix
eigen_values,eigen_vectors=np.linalg.eig(covariance_matrix)
# Sorting the eigenvalues from largest to smallest
idx = eigen_values.argsort()[::-1]
eigen_values = eigen_values[idx]
eigen_vectors = eigen_vectors[:,idx]
# Projecting the dataset on the final_dimension eigenvectors (selected based on higher eigenvalue)
reduced=np.matmul(centered_data,eigen_vectors[:,:final_dimension],out=None)
# If plotting the eigenvector of the covariance matrix and mean centered data is required
if plot:
plt.title("Mean Centered Data and the EigenVectors of Covariance Matrix (Scaled by EigenValue)")
plt.plot(centered_data[:,0][:50], centered_data[:,1][:50], 'ro', label = ' Data A')
plt.plot(centered_data[:,0][50:], centered_data[:,1][50:], 'go', label = ' Data B')
plt.quiver([0,0], [0,0], *eigen_values*(eigen_vectors), label = 'Eigen Vectors of Covariance Matrix',angles='xy', scale_units='xy', scale = 2.5)
plt.legend(loc = 'best')
return reduced
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment