Skip to content

Instantly share code, notes, and snippets.

@nithyadurai87
Created April 4, 2019 06:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nithyadurai87/20d18bbda53e43de19222e24d330a398 to your computer and use it in GitHub Desktop.
Save nithyadurai87/20d18bbda53e43de19222e24d330a398 to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
df = pd.read_csv('./flowers.csv')
X = df[list(df.columns)[:-1]]
y = df['Flower']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)
pca = PCA(n_components=2)
x = StandardScaler().fit_transform(X_train)
new_x = pd.DataFrame(data = pca.fit_transform(x), columns = ['x1', 'x2'])
df2 = pd.concat([new_x, df[['Flower']]], axis = 1)
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('x1', fontsize = 15)
ax.set_ylabel('x2', fontsize = 15)
ax.set_title('2 Components', fontsize = 20)
for i, j in zip(['Rose', 'Jasmin', 'Lotus'],['g', 'b', 'r']):
ax.scatter(df2.loc[df2['Flower'] == i, 'x1'], df2.loc[df2['Flower'] == i, 'x2'], c = j)
ax.legend(['Rose', 'Jasmin', 'Lotus'])
ax.grid()
plt.show()
print (pca.explained_variance_ratio_)
print (df.columns)
print (df2.columns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment