Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Forked from alextp/sparse_pca.py
Created January 28, 2011 02:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ogrisel/799724 to your computer and use it in GitHub Desktop.
Save ogrisel/799724 to your computer and use it in GitHub Desktop.
import numpy as np, scipy, scipy.sparse, numpy.linalg, scipy.optimize
from scipy import weave
def project_l1(lbda, sigma):
"Project positive vector lbda to have l1 norm sigma"
ll = -np.sort(-lbda)
cs = 0.
theta = 0
prevtheta = 0
cs = np.cumsum(ll)
vtheta = (cs-sigma)/(np.arange(ll.shape[0])+1.)
i = np.argmin(ll-vtheta >= 0)-1
prevtheta = vtheta[i]
a = lbda - prevtheta
a *= a > 0
return a
def norm(dif):
"Using np.sum instead of np.dot to never ever have a 1-element array as result"
return np.sum(dif*dif)
def getrow(M, i):
return np.asarray(M.getrow(i).todense()).reshape(-1)
def setrow(M, i, v):
J = v.nonzero()[0]
Nj = len(J)
weave.inline("""
int j;
for (j = 0; j < Nj; ++j) {
double d = v(J(j));
int ii = i;
PyObject_CallMethod(M, "__setitem__", "((ll)d)", ii,j, d);
}""", ["J", "M", "v", "Nj", "i"],
type_converters=weave.converters.blitz)
def recerror(orig, t1, t2):
"The error of reconstructing orig as t1*t2"
err = 0.
for i in xrange(orig.shape[0]):
oi = getrow(orig, i)
t1i = getrow(t1, i)
err += norm(oi-t2*t1i)
return err
class MatModel(object):
def __init__(self, M, alpha, Ntopics, lrate, nsteps):
"""M is the sparse matrix we want to decompose,
alpha is the maximum l1 norm for each line of the factorized matrices
Ntopics is the number of latent components to use
lrate is the learning rate to use in the gradient descent
nsteps is how many gradient-and-project steps are used at each iteration"""
self.na, self.nb = M.shape
self.Ntopics = Ntopics
self.alpha = alpha
self.lrate = lrate
self.nsteps = nsteps
self.M = M
self.Tl = scipy.sparse.csr_matrix(np.random.random((self.na,self.Ntopics)))
self.Tr = scipy.sparse.csr_matrix(np.random.random((self.nb,self.Ntopics)))
def iterate(self):
print "Maximizing tl"
self.optimize_l(self.M, self.Tl, self.Tr)
print "error =", self.error()
print "Maximizing tr"
self.optimize_l(self.M.T, self.Tr, self.Tl)
print "error =", self.error()
def error(self):
return recerror(self.M, self.Tl, self.Tr)
def optimize_l(self, M, l1, l2):
"""Code to optimize either Tl or Tr.
The optimization is a gradient-descent step followed by an l1 projection step."""
nt = np.dot(l2.T,l2).toarray()
for i in xrange(M.shape[0]):
ai = getrow(M, i)
tai = getrow(l1, i)
for i in xrange(self.nsteps):
grad = (np.dot(nt,tai)-(l2.T*ai).reshape(-1))
tai -= self.lrate*grad/np.sqrt(norm(grad))
tai = project_l1(tai, self.alpha)
setrow(l1, i, tai)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment