{{ message }}

Instantly share code, notes, and snippets.

# DrSkippy/dimReduction_SVD-PCA.py Secret

Created May 12, 2012
Demonstrate simple problem of machine learning and dimension reduction with Principle Component Analysis (PCA)
 #!/usr/bin/python # -*- coding: utf-8 -*- # SEE http://blog.drskippy.net/ for writeup # Demonstrate simple problem of machine learning and dimension # reduction with Principle Component Analysis (PCA). Use Singular Value # Decomposition (SVD) to make this as understandable as possible. # # Scott Hendrickson 2012-05-11 # # Many good references on the mathematics behind SVD and PCA. There are also # efficient algorithms for calculating the principle components without # explicitly solving the SVD problem. # # See, for example: # http://en.wikipedia.org/wiki/Singular_value_decomposition # http://en.wikipedia.org/wiki/Principal_component_analysis # # Use the scipy library from matplotlib.pyplot import * from scipy import linalg, mat, dot import numpy as np ######################## # Demo Part 1 ######################## # SVD: decompose a np.matrix X into product of unitary np.matrix (U), diagonal # np.matrix (S) and vector V, a rotation, (if it helps, think: # basis vectors, scaling, rotation) such that X.T = W x S x V_t. # # In machine learning, X will be a np.matrix of samples (4, rows) # and features (3, columns) for our learning examples. X_t = mat([[1, 3, -10], [0, 2, 1], [-1, -3, 9], [0, -2, 0]]) X = X_t.T # The scipy library makes this step easy (W, s, V_t) = linalg.svd(X) S = linalg.diagsvd(s, len(X), len(V_t)) recon = dot(dot(W, S), V_t) # Are these equal (to within rounding)? abs(X - recon) # maximum error np.max(abs(X - recon)) # One key to understanding nad moving to PCA is # understanding the diagonal np.matrix S S # Given that the features have zero-mean: [np.mean(i) for i in X] # s is ordered vector that tells us the "significance" of each dimension # in the rotated space. (Yes, I arranged it that way. To be clear, # you can do SVD on a matrix without zero-mean features, but the dimension # reduction part we are about to do requires it.) # # We can now selectively set lower values of s to # zero to reduce dimension. s_red = s s_red[2] = 0 # Approximately reconstruct our original matrix, but with # a reduced-dimension representation S_red = linalg.diagsvd(s_red, len(X), len(V_t)) S_red recon_red = dot(dot(W, S_red), V_t) abs(X - recon_red) # maximum error np.max(abs(X - recon_red)) # ratio of errors np.max(abs(X - recon)) / np.max(abs(X - recon_red)) # We "lost" 14 orders of magnitude in precision of the reconstruction, but this turns # out to be okay for some machine learning problems. ######################## # Demo Part 2 ######################## from scikits.learn import linear_model import scipy from copy import copy # Classification problem where points are linearly classifiable # in 2 dim. N = 200 x = np.random.normal(0, 4, N) y1 = 3 * x + np.random.normal(3, 1, N) y2 = 3 * x + np.random.normal(-3, 1, N) y_avg = np.mean(np.append(y1, y2, 1)) figure() plot(x, y1 - y_avg, 'bo') plot(x, y2 - y_avg, 'ro') title('Original data sets (0-average)') # features x and y are rows in this np.matrix X = np.append([x, y1 - y_avg], [x, y2 - y_avg], 1) X_t = X.T # y1 group 0; y2 group 1 truth = np.append(scipy.ones([1, N]), scipy.zeros([1, N]), 1) # 2d model works very well lr = linear_model.LogisticRegression() lr.fit(np.asarray(X_t), truth[0]) lr.score(np.asarray(X_t), truth[0]) lr.predict(np.asarray(X_t)) # Now try dimension reduciton with SVD (W, s, V_t) = linalg.svd(X) s # both transformed representations are the same before trimming: S = linalg.diagsvd(s, len(X), len(V_t)) np.max(abs(X.T * np.matrix(W) - np.matrix(V_t).T * np.matrix(S).T)) # Now work with the transformed coordinates. It might not have been clear # from above what the transformed coordinate system was. We can get there # by either X_prime = np.matrix(V_t).T * np.matrix(S).T x_prime = np.asarray(X_prime.T[0]) y_prime = np.asarray(X_prime.T[1]) figure() plot(x_prime, y_prime, 'go') title('Freatures after SVD Transformation') # Linearly classifiable in 1-d? Try all new basis directions (extremes of variation) # Min variation - Training along y-dim nearly perfect ypt = np.asarray(y_prime.T) lr.fit(ypt, truth[0]) lr.score(ypt, truth[0]) lr.predict(ypt) # Max variation - Nothing here xpt = np.asarray(x_prime.T) lr.fit(xpt, truth[0]) lr.score(xpt, truth[0]) lr.predict(xpt) ######################## # Demo Part 3 ######################## # Use PCA idea to reduce to 1-D s_red = copy(s) s_red[1] = 0 S_red = linalg.diagsvd(s_red, len(X), len(V_t)) X_prime = np.matrix(V_t).T * np.matrix(S_red).T x_prime = np.asarray(X_prime.T[0]) y_prime = np.asarray(X_prime.T[1]) figure() plot(x_prime, y_prime, 'yo') title('Reduce S by removing s[1] = %2.5f' % s[1]) # Try all new basis directions (extremes of variation) # 1-D: Max variation - Training along x-dim performs poorly ypt = np.asarray(y_prime.T) lr.fit(ypt, truth[0]) lr.score(ypt, truth[0]) lr.predict(ypt) # This is the other extrema to "principal" components s_red = copy(s) s_red[0] = 0 S_red = linalg.diagsvd(s_red, len(X), len(V_t)) X_prime = np.matrix(V_t).T * np.matrix(S_red).T x_prime = np.asarray(X_prime.T[0]) y_prime = np.asarray(X_prime.T[1]) figure() plot(x_prime, y_prime, 'mo') title('Reduce S by removing value s[0] = %2.5f' % s[0]) # 1-D: Min variation - Training along y-dim nearly perfect ypt = np.asarray(y_prime.T) lr.fit(ypt, truth[0]) lr.score(ypt, truth[0]) lr.predict(ypt)