Skip to content

Instantly share code, notes, and snippets.

@tmramalho
Created April 12, 2015 18:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tmramalho/ea938b7958d2803227f5 to your computer and use it in GitHub Desktop.
Save tmramalho/ea938b7958d2803227f5 to your computer and use it in GitHub Desktop.
Factor analysis and PCA blog post
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, FactorAnalysis, SparsePCA
from sklearn import datasets
dset = datasets.load_digits()
x = dset.data
y = dset.target
model = PCA(n_components=2)
x_reduced = model.fit_transform(x)
plt.figure(figsize=(3*1.618,3))
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1)
# plt.savefig('digits_PCA.png')
print np.dot(model.components_[0], model.components_[1])
print model.noise_variance_
v = model.transform(x)
v_samples = np.random.multivariate_normal(np.mean(v, axis=0), np.cov(v.T), size=100)
x_samples = model.inverse_transform(v_samples)
n_row = 5
n_col = 20
plt.figure(figsize=(10, 2.5))
for i in xrange(n_row*n_col):
comp = x_samples[i]
plt.subplot(n_row, n_col, i + 1)
vmax = max(comp.max(), -comp.min())
plt.imshow(comp.reshape([8,8]), cmap=plt.cm.gray, interpolation='nearest')
plt.xticks(())
plt.yticks(())
plt.tight_layout(h_pad=0, w_pad=0)
plt.savefig('gen_PCA.png')
model = FactorAnalysis(n_components=2)
x_reduced = model.fit_transform(x)
plt.figure(figsize=(3*1.618,3))
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1)
# plt.savefig('digits_FA.png')
print np.dot(model.components_[0], model.components_[1])
print model.noise_variance_
model = SparsePCA(n_components=10)
x_reduced = model.fit_transform(x)
plt.figure(figsize=(3*1.618,3))
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Set1)
plt.savefig('digits_SPCA.png')
print np.dot(model.components_[0], model.components_[1])
n_row = 5
n_col = 2
plt.figure(figsize=(6, 15))
for i in xrange(n_row*n_col):
comp = model.components_[i]
plt.subplot(n_row, n_col, i + 1)
vmax = max(comp.max(), -comp.min())
plt.imshow(comp.reshape([8,8]), cmap=plt.cm.gray, interpolation='nearest')
plt.xticks(())
plt.yticks(())
plt.tight_layout()
plt.savefig('digits_SPCA_rec.png')
import numpy as np
import matplotlib.pyplot as plt
from scipy import linalg
from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.covariance import ShrunkCovariance, LedoitWolf
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import GridSearchCV
from sklearn import datasets
iris = datasets.load_iris()
x = iris.data
y = iris.target
model = PCA(n_components=2)
x_reduced = model.fit_transform(x)
plt.figure(figsize=(3*1.618,3))
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Paired)
plt.savefig('iris_PCA.png')
print np.dot(model.components_[0], model.components_[1])
print model.noise_variance_
model = PCA(n_components=1)
x_reduced = model.fit_transform(x)
y_rand = np.random.normal(scale=0.1, size=x_reduced.shape)
plt.figure(figsize=(3*1.618,1))
plt.scatter(x_reduced[:, 0], y_rand, c=y, cmap=plt.cm.Paired)
plt.ylim(-1,1)
plt.yticks([-1,1])
plt.setp(plt.gca().get_yticklabels(), visible=False)
plt.tight_layout()
plt.savefig('iris_PCA_hist.png')
model = FactorAnalysis(n_components=2)
x_reduced = model.fit_transform(x)
plt.figure(figsize=(3*1.618,3))
plt.scatter(x_reduced[:, 0], x_reduced[:, 1], c=y, cmap=plt.cm.Paired)
plt.savefig('iris_FA.png')
print np.dot(model.components_[0], model.components_[1])
print model.noise_variance_
model = FactorAnalysis(n_components=1)
x_reduced = model.fit_transform(x)
y_rand = np.random.normal(scale=0.1, size=x_reduced.shape)
plt.figure(figsize=(3*1.618,1))
plt.scatter(x_reduced[:, 0], y_rand, c=y, cmap=plt.cm.Paired)
plt.ylim(-1,1)
plt.yticks([-1,1])
plt.setp(plt.gca().get_yticklabels(), visible=False)
plt.tight_layout()
plt.savefig('iris_FA_hist.png')
print model.noise_variance_
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment