Digits analysis

Created April 12, 2015 18:22
Factor analysis and PCA blog post
 import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA, FactorAnalysis, SparsePCA from sklearn import datasets dset = datasets.load_digits() x = dset.data y = dset.target model = PCA(n_components=2) x_reduced = model.fit_transform(x) plt.figure(figsize=(3*1.618,3)) plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1) # plt.savefig('digits_PCA.png') print np.dot(model.components_[0], model.components_[1]) print model.noise_variance_ v = model.transform(x) v_samples = np.random.multivariate_normal(np.mean(v, axis=0), np.cov(v.T), size=100) x_samples = model.inverse_transform(v_samples) n_row = 5 n_col = 20 plt.figure(figsize=(10, 2.5)) for i in xrange(n_row*n_col): comp = x_samples[i] plt.subplot(n_row, n_col, i + 1) vmax = max(comp.max(), -comp.min()) plt.imshow(comp.reshape([8,8]), cmap=plt.cm.gray, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.tight_layout(h_pad=0, w_pad=0) plt.savefig('gen_PCA.png') model = FactorAnalysis(n_components=2) x_reduced = model.fit_transform(x) plt.figure(figsize=(3*1.618,3)) plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1) # plt.savefig('digits_FA.png') print np.dot(model.components_[0], model.components_[1]) print model.noise_variance_ model = SparsePCA(n_components=10) x_reduced = model.fit_transform(x) plt.figure(figsize=(3*1.618,3)) plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1) plt.savefig('digits_SPCA.png') print np.dot(model.components_[0], model.components_[1]) n_row = 5 n_col = 2 plt.figure(figsize=(6, 15)) for i in xrange(n_row*n_col): comp = model.components_[i] plt.subplot(n_row, n_col, i + 1) vmax = max(comp.max(), -comp.min()) plt.imshow(comp.reshape([8,8]), cmap=plt.cm.gray, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.tight_layout() plt.savefig('digits_SPCA_rec.png')
 import numpy as np import matplotlib.pyplot as plt from scipy import linalg from sklearn.decomposition import PCA, FactorAnalysis from sklearn.covariance import ShrunkCovariance, LedoitWolf from sklearn.cross_validation import cross_val_score from sklearn.grid_search import GridSearchCV from sklearn import datasets iris = datasets.load_iris() x = iris.data y = iris.target model = PCA(n_components=2) x_reduced = model.fit_transform(x) plt.figure(figsize=(3*1.618,3)) plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Paired) plt.savefig('iris_PCA.png') print np.dot(model.components_[0], model.components_[1]) print model.noise_variance_ model = PCA(n_components=1) x_reduced = model.fit_transform(x) y_rand = np.random.normal(scale=0.1, size=x_reduced.shape) plt.figure(figsize=(3*1.618,1)) plt.scatter(x_reduced[:, 0], y_rand, c=y, cmap=plt.cm.Paired) plt.ylim(-1,1) plt.yticks([-1,1]) plt.setp(plt.gca().get_yticklabels(), visible=False) plt.tight_layout() plt.savefig('iris_PCA_hist.png') model = FactorAnalysis(n_components=2) x_reduced = model.fit_transform(x) plt.figure(figsize=(3*1.618,3)) plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Paired) plt.savefig('iris_FA.png') print np.dot(model.components_[0], model.components_[1]) print model.noise_variance_ model = FactorAnalysis(n_components=1) x_reduced = model.fit_transform(x) y_rand = np.random.normal(scale=0.1, size=x_reduced.shape) plt.figure(figsize=(3*1.618,1)) plt.scatter(x_reduced[:, 0], y_rand, c=y, cmap=plt.cm.Paired) plt.ylim(-1,1) plt.yticks([-1,1]) plt.setp(plt.gca().get_yticklabels(), visible=False) plt.tight_layout() plt.savefig('iris_FA_hist.png') print model.noise_variance_