Skip to content

Instantly share code, notes, and snippets.

@yuyasugano
Created Nov 23, 2020
Embed
What would you like to do?
3 ways to do dimensional reduction techniques in Scikit-learn
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.datasets import load_breast_cancer
# Before applying PCA, each feature should be centered (zero mean) and with unit variance
X_normalized = StandardScaler().fit(X).transform(X)
pca = PCA(n_components=2).fit(X_normalized)
X_pca = pca.transform(X_normalized)
print(X.shape, X_pca.shape)
(569, 30) (569, 2)
X_train_pca, X_test_pca, y_train, y_test = train_test_split(X_pca, y, random_state=0)
clf_pca = LogisticRegression(random_state=0)
clf_pca.fit(X_train_pca, y_train)
print('%s: %.3f' % ('Logreg Train Accuracy', accuracy_score(y_train, clf_pca.predict(X_train_pca))))
print('%s: %.3f' % ('Logreg Test Accuracy', accuracy_score(y_test, clf_pca.predict(X_test_pca))))
print('%s: %.3f' % ('Logreg Train F1 Score', f1_score(y_train, clf_pca.predict(X_train_pca))))
print('%s: %.3f' % ('Logreg Test F1 Score', f1_score(y_test, clf_pca.predict(X_test_pca))))
print(classification_report(y_test, clf_pca.predict(X_test_pca)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment