Skip to content

Instantly share code, notes, and snippets.

@DrSkippy
Created May 12, 2012 03:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DrSkippy/dc92b406b4e97aa2e694 to your computer and use it in GitHub Desktop.
Save DrSkippy/dc92b406b4e97aa2e694 to your computer and use it in GitHub Desktop.
Demonstrate simple problem of machine learning and dimension reduction with Principle Component Analysis (PCA)
#!/usr/bin/python
# -*- coding: utf-8 -*-
# SEE http://blog.drskippy.net/ for writeup
# Demonstrate simple problem of machine learning and dimension
# reduction with Principle Component Analysis (PCA). Use Singular Value
# Decomposition (SVD) to make this as understandable as possible.
#
# Scott Hendrickson 2012-05-11
#
# Many good references on the mathematics behind SVD and PCA. There are also
# efficient algorithms for calculating the principle components without
# explicitly solving the SVD problem.
#
# See, for example:
# http://en.wikipedia.org/wiki/Singular_value_decomposition
# http://en.wikipedia.org/wiki/Principal_component_analysis
#
# Use the scipy library
from matplotlib.pyplot import *
from scipy import linalg, mat, dot
import numpy as np
########################
# Demo Part 1
########################
# SVD: decompose a np.matrix X into product of unitary np.matrix (U), diagonal
# np.matrix (S) and vector V, a rotation, (if it helps, think:
# basis vectors, scaling, rotation) such that X.T = W x S x V_t.
#
# In machine learning, X will be a np.matrix of samples (4, rows)
# and features (3, columns) for our learning examples.
X_t = mat([[1, 3, -10], [0, 2, 1], [-1, -3, 9], [0, -2, 0]])
X = X_t.T
# The scipy library makes this step easy
(W, s, V_t) = linalg.svd(X)
S = linalg.diagsvd(s, len(X), len(V_t))
recon = dot(dot(W, S), V_t)
# Are these equal (to within rounding)?
abs(X - recon)
# maximum error
np.max(abs(X - recon))
# One key to understanding nad moving to PCA is
# understanding the diagonal np.matrix S
S
# Given that the features have zero-mean:
[np.mean(i) for i in X]
# s is ordered vector that tells us the "significance" of each dimension
# in the rotated space. (Yes, I arranged it that way. To be clear,
# you can do SVD on a matrix without zero-mean features, but the dimension
# reduction part we are about to do requires it.)
#
# We can now selectively set lower values of s to
# zero to reduce dimension.
s_red = s
s_red[2] = 0
# Approximately reconstruct our original matrix, but with
# a reduced-dimension representation
S_red = linalg.diagsvd(s_red, len(X), len(V_t))
S_red
recon_red = dot(dot(W, S_red), V_t)
abs(X - recon_red)
# maximum error
np.max(abs(X - recon_red))
# ratio of errors
np.max(abs(X - recon)) / np.max(abs(X - recon_red))
# We "lost" 14 orders of magnitude in precision of the reconstruction, but this turns
# out to be okay for some machine learning problems.
########################
# Demo Part 2
########################
from scikits.learn import linear_model
import scipy
from copy import copy
# Classification problem where points are linearly classifiable
# in 2 dim.
N = 200
x = np.random.normal(0, 4, N)
y1 = 3 * x + np.random.normal(3, 1, N)
y2 = 3 * x + np.random.normal(-3, 1, N)
y_avg = np.mean(np.append(y1, y2, 1))
figure()
plot(x, y1 - y_avg, 'bo')
plot(x, y2 - y_avg, 'ro')
title('Original data sets (0-average)')
# features x and y are rows in this np.matrix
X = np.append([x, y1 - y_avg], [x, y2 - y_avg], 1)
X_t = X.T
# y1 group 0; y2 group 1
truth = np.append(scipy.ones([1, N]), scipy.zeros([1, N]), 1)
# 2d model works very well
lr = linear_model.LogisticRegression()
lr.fit(np.asarray(X_t), truth[0])
lr.score(np.asarray(X_t), truth[0])
lr.predict(np.asarray(X_t))
# Now try dimension reduciton with SVD
(W, s, V_t) = linalg.svd(X)
s
# both transformed representations are the same before trimming:
S = linalg.diagsvd(s, len(X), len(V_t))
np.max(abs(X.T * np.matrix(W) - np.matrix(V_t).T * np.matrix(S).T))
# Now work with the transformed coordinates. It might not have been clear
# from above what the transformed coordinate system was. We can get there
# by either
X_prime = np.matrix(V_t).T * np.matrix(S).T
x_prime = np.asarray(X_prime.T[0])
y_prime = np.asarray(X_prime.T[1])
figure()
plot(x_prime, y_prime, 'go')
title('Freatures after SVD Transformation')
# Linearly classifiable in 1-d? Try all new basis directions (extremes of variation)
# Min variation - Training along y-dim nearly perfect
ypt = np.asarray(y_prime.T)
lr.fit(ypt, truth[0])
lr.score(ypt, truth[0])
lr.predict(ypt)
# Max variation - Nothing here
xpt = np.asarray(x_prime.T)
lr.fit(xpt, truth[0])
lr.score(xpt, truth[0])
lr.predict(xpt)
########################
# Demo Part 3
########################
# Use PCA idea to reduce to 1-D
s_red = copy(s)
s_red[1] = 0
S_red = linalg.diagsvd(s_red, len(X), len(V_t))
X_prime = np.matrix(V_t).T * np.matrix(S_red).T
x_prime = np.asarray(X_prime.T[0])
y_prime = np.asarray(X_prime.T[1])
figure()
plot(x_prime, y_prime, 'yo')
title('Reduce S by removing s[1] = %2.5f' % s[1])
# Try all new basis directions (extremes of variation)
# 1-D: Max variation - Training along x-dim performs poorly
ypt = np.asarray(y_prime.T)
lr.fit(ypt, truth[0])
lr.score(ypt, truth[0])
lr.predict(ypt)
# This is the other extrema to "principal" components
s_red = copy(s)
s_red[0] = 0
S_red = linalg.diagsvd(s_red, len(X), len(V_t))
X_prime = np.matrix(V_t).T * np.matrix(S_red).T
x_prime = np.asarray(X_prime.T[0])
y_prime = np.asarray(X_prime.T[1])
figure()
plot(x_prime, y_prime, 'mo')
title('Reduce S by removing value s[0] = %2.5f' % s[0])
# 1-D: Min variation - Training along y-dim nearly perfect
ypt = np.asarray(y_prime.T)
lr.fit(ypt, truth[0])
lr.score(ypt, truth[0])
lr.predict(ypt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment