Instantly share code, notes, and snippets.

Embed
What would you like to do?
Some simple experiments with PCA and PLS for feature extraction.
import numpy
from sklearn.datasets import load_iris
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSRegression
from sklearn.cross_validation import KFold
from sklearn.svm import LinearSVC
from sklearn.metrics import zero_one_loss
dataset = load_iris()
X = dataset["data"]
y = dataset["target"]
# Center each feature and scale the variance to be unitary
X = preprocessing.scale(X)
# Compute the variance for each column
print(numpy.var(X, 0).sum())
# Now use PCA using 3 components
pca = PCA(3)
X2 = pca.fit_transform(X)
print(numpy.var(X2, 0).sum())
pls = PLSRegression(3)
pls.fit(X, y)
X2 = pls.transform(X)
print(numpy.var(X2, 0).sum())
# Make predictions using an SVM with PCA and PLS
pca_error = 0
pls_error = 0
n_folds = 10
svc = LinearSVC()
for train_inds, test_inds in KFold(X.shape[0], n_folds=n_folds):
X_train, X_test = X[train_inds], X[test_inds]
y_train, y_test = y[train_inds], y[test_inds]
# Use PCA and then classify using an SVM
X_train2 = pca.fit_transform(X_train)
X_test2 = pca.transform(X_test)
svc.fit(X_train2, y_train)
y_pred = svc.predict(X_test2)
pca_error += zero_one_loss(y_test, y_pred)
# Use PLS and then classify using an SVM
X_train2, y_train2 = pls.fit_transform(X_train, y_train)
X_test2 = pls.transform(X_test)
svc.fit(X_train2, y_train)
y_pred = svc.predict(X_test2)
pls_error += zero_one_loss(y_test, y_pred)
print(pca_error/n_folds)
print(pls_error/n_folds)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment