Skip to content

Instantly share code, notes, and snippets.

@arnaldog12
Created March 2, 2019 23:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arnaldog12/8b162fb375e3d6a4bf0bce9b0504b454 to your computer and use it in GitHub Desktop.
Save arnaldog12/8b162fb375e3d6a4bf0bce9b0504b454 to your computer and use it in GitHub Desktop.
Machine Learning
```
extracted from: https://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
```
import numpy as np
from numpy import linalg as la
np.random.seed(42)
def flip_signs(A, B):
"""
utility function for resolving the sign ambiguity in SVD
http://stats.stackexchange.com/q/34396/115202
"""
signs = np.sign(A) * np.sign(B)
return A, B * signs
# Let the data matrix X be of n x p size,
# where n is the number of samples and p is the number of variables
n, p = 5, 3
X = np.random.rand(n, p)
# Let us assume that it is centered
X -= np.mean(X, axis=0)
# the p x p covariance matrix
C = np.cov(X, rowvar=False)
print("C = \n", C)
# C is a symmetric matrix and so it can be diagonalized:
l, principal_axes = la.eig(C)
# sort results wrt. eigenvalues
idx = l.argsort()[::-1]
l, principal_axes = l[idx], principal_axes[:, idx]
# the eigenvalues in decreasing order
print("l = \n", l)
# a matrix of eigenvectors (each column is an eigenvector)
print("V = \n", principal_axes)
# projections of X on the principal axes are called principal components
principal_components = X.dot(principal_axes)
print("Y = \n", principal_components)
# we now perform singular value decomposition of X
# "economy size" (or "thin") SVD
U, s, Vt = la.svd(X, full_matrices=False)
V = Vt.T
S = np.diag(s)
# 1) then columns of V are principal directions/axes.
assert(np.allclose(*flip_signs(V, principal_axes)))
# 2) columns of US are principal components
assert(np.allclose(*flip_signs(U.dot(S), principal_components)))
# 3) singular values are related to the eigenvalues of covariance matrix
assert(np.allclose((s ** 2) / (n - 1), l))
# 8) dimensionality reduction
k = 2
PC_k = principal_components[:, 0:k]
US_k = U[:, 0:k].dot(S[0:k, 0:k])
assert(np.allclose(*flip_signs(PC_k, US_k)))
# 10) we used "economy size" (or "thin") SVD
assert(U.shape == (n, p))
assert(S.shape == (p, p))
assert(V.shape == (p, p))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment