Factor analysis and PCA blog post
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.decomposition import PCA, FactorAnalysis, SparsePCA | |
from sklearn import datasets | |
dset = datasets.load_digits() | |
x = dset.data | |
y = dset.target | |
model = PCA(n_components=2) | |
x_reduced = model.fit_transform(x) | |
plt.figure(figsize=(3*1.618,3)) | |
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1) | |
# plt.savefig('digits_PCA.png') | |
print np.dot(model.components_[0], model.components_[1]) | |
print model.noise_variance_ | |
v = model.transform(x) | |
v_samples = np.random.multivariate_normal(np.mean(v, axis=0), np.cov(v.T), size=100) | |
x_samples = model.inverse_transform(v_samples) | |
n_row = 5 | |
n_col = 20 | |
plt.figure(figsize=(10, 2.5)) | |
for i in xrange(n_row*n_col): | |
comp = x_samples[i] | |
plt.subplot(n_row, n_col, i + 1) | |
vmax = max(comp.max(), -comp.min()) | |
plt.imshow(comp.reshape([8,8]), cmap=plt.cm.gray, interpolation='nearest') | |
plt.xticks(()) | |
plt.yticks(()) | |
plt.tight_layout(h_pad=0, w_pad=0) | |
plt.savefig('gen_PCA.png') | |
model = FactorAnalysis(n_components=2) | |
x_reduced = model.fit_transform(x) | |
plt.figure(figsize=(3*1.618,3)) | |
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1) | |
# plt.savefig('digits_FA.png') | |
print np.dot(model.components_[0], model.components_[1]) | |
print model.noise_variance_ | |
model = SparsePCA(n_components=10) | |
x_reduced = model.fit_transform(x) | |
plt.figure(figsize=(3*1.618,3)) | |
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1) | |
plt.savefig('digits_SPCA.png') | |
print np.dot(model.components_[0], model.components_[1]) | |
n_row = 5 | |
n_col = 2 | |
plt.figure(figsize=(6, 15)) | |
for i in xrange(n_row*n_col): | |
comp = model.components_[i] | |
plt.subplot(n_row, n_col, i + 1) | |
vmax = max(comp.max(), -comp.min()) | |
plt.imshow(comp.reshape([8,8]), cmap=plt.cm.gray, interpolation='nearest') | |
plt.xticks(()) | |
plt.yticks(()) | |
plt.tight_layout() | |
plt.savefig('digits_SPCA_rec.png') | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy import linalg | |
from sklearn.decomposition import PCA, FactorAnalysis | |
from sklearn.covariance import ShrunkCovariance, LedoitWolf | |
from sklearn.cross_validation import cross_val_score | |
from sklearn.grid_search import GridSearchCV | |
from sklearn import datasets | |
iris = datasets.load_iris() | |
x = iris.data | |
y = iris.target | |
model = PCA(n_components=2) | |
x_reduced = model.fit_transform(x) | |
plt.figure(figsize=(3*1.618,3)) | |
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Paired) | |
plt.savefig('iris_PCA.png') | |
print np.dot(model.components_[0], model.components_[1]) | |
print model.noise_variance_ | |
model = PCA(n_components=1) | |
x_reduced = model.fit_transform(x) | |
y_rand = np.random.normal(scale=0.1, size=x_reduced.shape) | |
plt.figure(figsize=(3*1.618,1)) | |
plt.scatter(x_reduced[:, 0], y_rand, c=y, cmap=plt.cm.Paired) | |
plt.ylim(-1,1) | |
plt.yticks([-1,1]) | |
plt.setp(plt.gca().get_yticklabels(), visible=False) | |
plt.tight_layout() | |
plt.savefig('iris_PCA_hist.png') | |
model = FactorAnalysis(n_components=2) | |
x_reduced = model.fit_transform(x) | |
plt.figure(figsize=(3*1.618,3)) | |
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Paired) | |
plt.savefig('iris_FA.png') | |
print np.dot(model.components_[0], model.components_[1]) | |
print model.noise_variance_ | |
model = FactorAnalysis(n_components=1) | |
x_reduced = model.fit_transform(x) | |
y_rand = np.random.normal(scale=0.1, size=x_reduced.shape) | |
plt.figure(figsize=(3*1.618,1)) | |
plt.scatter(x_reduced[:, 0], y_rand, c=y, cmap=plt.cm.Paired) | |
plt.ylim(-1,1) | |
plt.yticks([-1,1]) | |
plt.setp(plt.gca().get_yticklabels(), visible=False) | |
plt.tight_layout() | |
plt.savefig('iris_FA_hist.png') | |
print model.noise_variance_ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment