Skip to content

Instantly share code, notes, and snippets.

@bougui505
Created October 25, 2019 11:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bougui505/8befffc94c929e61f5bcd4cfaacceaee to your computer and use it in GitHub Desktop.
Save bougui505/8befffc94c929e61f5bcd4cfaacceaee to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: UTF8 -*-
# Author: Guillaume Bouvier -- guillaume.bouvier@pasteur.fr
# https://research.pasteur.fr/en/member/guillaume-bouvier/
# 2019-10-24 16:26:04 (UTC+0200)
import numpy
def get_cov(M, axis=1):
"""
Return the covariance matrix from M
>>> numpy.random.seed(0)
>>> n = 3*50
>>> s = 100
>>> M = numpy.random.uniform(size=(n, s))
>>> M.shape
(150, 100)
>>> get_cov(M, axis=1).shape
(150, 150)
>>> get_cov(M, axis=0).shape
(100, 100)
"""
n, s = M.shape
if axis == 1:
cov = (1./s)*M.dot(M.T)
if axis == 0:
cov = (1./s)*M.T.dot(M)
return cov
class PCA(object):
"""
PCA in the descriptor space
>>> numpy.random.seed(0)
>>> n = 3*2
>>> s = 10
>>> M = numpy.random.uniform(size=(n, s))
>>> pca = PCA(M)
>>> pca.M.shape
(6, 10)
>>> pca.V.shape
(6, 6)
>>> pca.w.shape
(6,)
>>> pca.v.shape
(6, 6)
Below are the first 4 eigenvalues:
>>> pca.w[:4]
array([1.76065779, 0.14085545, 0.05867094, 0.05254696])
... and the first 4 eigenvectors:
>>> pca.v[:, :4]
array([[-0.4749717 , 0.10193628, 0.26461765, 0.21559906],
[-0.44947704, -0.52536624, -0.33485093, -0.35988442],
[-0.4655914 , -0.30472463, -0.29266213, 0.47054322],
[-0.43379875, 0.02234178, 0.75167072, -0.20913282],
[-0.27113005, 0.58891008, -0.21645815, 0.43369448],
[-0.30643771, 0.52290341, -0.346898 , -0.6089022 ]])
Test to cut to ncomp number of component:
>>> (PCA(M, ncomp=4).v == pca.v[:, :4]).all()
True
>>> (PCA(M, ncomp=4).w == pca.w[:4]).all()
True
Try on axis = 0:
>>> pca_2 = PCA(M, axis=0)
The shape of the covariance matrix for each case:
>>> pca_2.V.shape, pca.V.shape
((10, 10), (6, 6))
And the corresponding eigenvalues:
>>> pca_2.w.shape, pca.w.shape
((6,), (6,))
However the eigeinvectors are equal:
>>> pca_2.v.shape, pca.v.shape
((6, 6), (6, 6))
>>> numpy.isclose(pca_2.v, pca.v).all()
True
"""
def __init__(self, M, ncomp=None, axis=1):
self.M = M
(self.n, self.s) = self.M.shape
if ncomp is None:
self.ncomp = self.n
else:
self.ncomp = ncomp
self.V = get_cov(M, axis=axis)
w, v = numpy.linalg.eigh(self.V)
sorter = w.argsort()[::-1]
self.w = w[sorter]
self.v = v[:, sorter]
self.w = self.w[:self.ncomp]
self.v = self.v[:, :self.ncomp]
if axis == 0:
self.w, self.v = self.switch_space()
def switch_space(self):
v = self.M.dot(self.v)/numpy.sqrt(float(self.s)*self.w)
v = v[:, :self.ncomp]
w = self.w[:self.ncomp]
return w, v
if __name__ == '__main__':
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment