alextp/arow.py

## arow.py
# coding: utf-8

import theano
import theano.tensor as T
import theano.sparse

import numpy as np


class Arow(object):
    """A class that implements a hacked-together version of AROW
    (Crammer et al, Adaptive regularization of weight vectors) using a
    diagonal matrix approximation to the covariance. It is hacky
    because (1) I'm not solving the arow equations exactly (I think
    this iterated to a fixpoint solves the mean equation and this
    ridiculously approximates the covariance equation), but this is
    hard if we want to make it general-purpose. At least it's fast and
    easily pluggable into pretty much any theano-based SGD
    classifier."""
    def __init__(self, params, loss, lbda1, lbda2, inputs):
        self.params = params
        self.loss = loss
        self.sigma = [theano.shared(value=np.ones(p.value.shape)) for p in params]
        self.gl = [T.grad(cost=self.loss, wrt=p) for p in params]
        self.ups = {}
        for i in xrange(len(params)):
            self.ups[params[i]] = params[i] - lbda1*self.gl[i]/self.sigma[i]
            self.ups[self.sigma[i]] = self.sigma[i] + self.gl[i]*self.gl[i] # wild guess
        self.update = theano.function(inputs=[inputs], outputs=[], updates=self.ups)


class ArowLR(object):
    "The logistic regression example from classifier.py modified to use AROW updates."
    def __init__(self, nfeats, l1, l2, lrate):
        self.W = theano.shared(value=np.random.random((nfeats,1))-0.5, name='W')
        self.x = theano.sparse.csr_matrix("x")
        self.p_1_given_x = 1./(1+T.exp(T.sum(self.x.__dot__(self.W))))
        self.p_0_given_x = 1-self.p_1_given_x
        self.pos_loss = -T.log(self.p_1_given_x)
        self.neg_loss = -T.log(self.p_0_given_x)
        self.regularizer = l1*T.sum(abs(self.W)) + l2*T.sum(T.dot(self.W.T,self.W))
        self.grad_pos = T.grad(cost=self.pos_loss + self.regularizer, wrt=self.W)
        self.grad_neg = T.grad(cost=self.neg_loss + self.regularizer, wrt=self.W)
        self.pos_update = Arow([self.W], self.pos_loss, l1, l2, self.x).update
        self.neg_update = Arow([self.W], self.neg_loss, l1, l2, self.x).update
        self.apply = theano.function(inputs=[self.x],
                                     outputs=T.log(self.p_1_given_x))


class ArowLLearner(object):
    """Wrapper class for the arow logistic regression classifier"""
    def __init__(self, Nfeats):
        self.learner = ArowLR(Nfeats, 0.1, 0.1, 2.1)
        self.apply = self.learner.apply

    def update(self, f, cls):
        if cls:
            self.learner.pos_update(f)
        else:
            self.learner.neg_update(f)
	# coding: utf-8

	import theano
	import theano.tensor as T
	import theano.sparse

	import numpy as np


	class Arow(object):
	"""A class that implements a hacked-together version of AROW
	(Crammer et al, Adaptive regularization of weight vectors) using a
	diagonal matrix approximation to the covariance. It is hacky
	because (1) I'm not solving the arow equations exactly (I think
	this iterated to a fixpoint solves the mean equation and this
	ridiculously approximates the covariance equation), but this is
	hard if we want to make it general-purpose. At least it's fast and
	easily pluggable into pretty much any theano-based SGD
	classifier."""
	def __init__(self, params, loss, lbda1, lbda2, inputs):
	self.params = params
	self.loss = loss
	self.sigma = [theano.shared(value=np.ones(p.value.shape)) for p in params]
	self.gl = [T.grad(cost=self.loss, wrt=p) for p in params]
	self.ups = {}
	for i in xrange(len(params)):
	self.ups[params[i]] = params[i] - lbda1*self.gl[i]/self.sigma[i]
	self.ups[self.sigma[i]] = self.sigma[i] + self.gl[i]*self.gl[i] # wild guess
	self.update = theano.function(inputs=[inputs], outputs=[], updates=self.ups)




	class ArowLR(object):
	"The logistic regression example from classifier.py modified to use AROW updates."
	def __init__(self, nfeats, l1, l2, lrate):
	self.W = theano.shared(value=np.random.random((nfeats,1))-0.5, name='W')
	self.x = theano.sparse.csr_matrix("x")
	self.p_1_given_x = 1./(1+T.exp(T.sum(self.x.__dot__(self.W))))
	self.p_0_given_x = 1-self.p_1_given_x
	self.pos_loss = -T.log(self.p_1_given_x)
	self.neg_loss = -T.log(self.p_0_given_x)
	self.regularizer = l1T.sum(abs(self.W)) + l2T.sum(T.dot(self.W.T,self.W))
	self.grad_pos = T.grad(cost=self.pos_loss + self.regularizer, wrt=self.W)
	self.grad_neg = T.grad(cost=self.neg_loss + self.regularizer, wrt=self.W)
	self.pos_update = Arow([self.W], self.pos_loss, l1, l2, self.x).update
	self.neg_update = Arow([self.W], self.neg_loss, l1, l2, self.x).update
	self.apply = theano.function(inputs=[self.x],
	outputs=T.log(self.p_1_given_x))



	class ArowLLearner(object):
	"""Wrapper class for the arow logistic regression classifier"""
	def __init__(self, Nfeats):
	self.learner = ArowLR(Nfeats, 0.1, 0.1, 2.1)
	self.apply = self.learner.apply

	def update(self, f, cls):
	if cls:
	self.learner.pos_update(f)
	else:
	self.learner.neg_update(f)