Last active
November 26, 2018 11:12
-
-
Save SaiVinay007/627963da0bb514b72abbccccc4d0a70a to your computer and use it in GitHub Desktop.
PCA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Reduced_data_set(dataset,final_dimension, plot = False): | |
''' | |
This function is used to perform pca | |
and retain final_dimension number of | |
principle component from our dataset | |
''' | |
# Centering all the features to zero mean | |
Mean_of_data=dataset.mean(0) | |
centered_data=dataset-Mean_of_data | |
# Calculating the Covariance Matrix | |
covariance_matrix=np.cov(centered_data.T) | |
# Computing the eigenvalues and eigenvectors of the Covariance Matrix | |
eigen_values,eigen_vectors=np.linalg.eig(covariance_matrix) | |
# Sorting the eigenvalues from largest to smallest | |
idx = eigen_values.argsort()[::-1] | |
eigen_values = eigen_values[idx] | |
eigen_vectors = eigen_vectors[:,idx] | |
# Projecting the dataset on the final_dimension eigenvectors (selected based on higher eigenvalue) | |
reduced=np.matmul(centered_data,eigen_vectors[:,:final_dimension],out=None) | |
# If plotting the eigenvector of the covariance matrix and mean centered data is required | |
if plot: | |
plt.title("Mean Centered Data and the EigenVectors of Covariance Matrix (Scaled by EigenValue)") | |
plt.plot(centered_data[:,0][:50], centered_data[:,1][:50], 'ro', label = ' Data A') | |
plt.plot(centered_data[:,0][50:], centered_data[:,1][50:], 'go', label = ' Data B') | |
plt.quiver([0,0], [0,0], *eigen_values*(eigen_vectors), label = 'Eigen Vectors of Covariance Matrix',angles='xy', scale_units='xy', scale = 2.5) | |
plt.legend(loc = 'best') | |
return reduced |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment