Created
October 2, 2010 03:40
-
-
Save alextp/607259 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import theano | |
import theano.tensor as T | |
import theano.sparse | |
import numpy as np | |
class Arow(object): | |
"""A class that implements a hacked-together version of AROW | |
(Crammer et al, Adaptive regularization of weight vectors) using a | |
diagonal matrix approximation to the covariance. It is hacky | |
because (1) I'm not solving the arow equations exactly (I think | |
this iterated to a fixpoint solves the mean equation and this | |
ridiculously approximates the covariance equation), but this is | |
hard if we want to make it general-purpose. At least it's fast and | |
easily pluggable into pretty much any theano-based SGD | |
classifier.""" | |
def __init__(self, params, loss, lbda1, lbda2, inputs): | |
self.params = params | |
self.loss = loss | |
self.sigma = [theano.shared(value=np.ones(p.value.shape)) for p in params] | |
self.gl = [T.grad(cost=self.loss, wrt=p) for p in params] | |
self.ups = {} | |
for i in xrange(len(params)): | |
self.ups[params[i]] = params[i] - lbda1*self.gl[i]/self.sigma[i] | |
self.ups[self.sigma[i]] = self.sigma[i] + self.gl[i]*self.gl[i] # wild guess | |
self.update = theano.function(inputs=[inputs], outputs=[], updates=self.ups) | |
class ArowLR(object): | |
"The logistic regression example from classifier.py modified to use AROW updates." | |
def __init__(self, nfeats, l1, l2, lrate): | |
self.W = theano.shared(value=np.random.random((nfeats,1))-0.5, name='W') | |
self.x = theano.sparse.csr_matrix("x") | |
self.p_1_given_x = 1./(1+T.exp(T.sum(self.x.__dot__(self.W)))) | |
self.p_0_given_x = 1-self.p_1_given_x | |
self.pos_loss = -T.log(self.p_1_given_x) | |
self.neg_loss = -T.log(self.p_0_given_x) | |
self.regularizer = l1*T.sum(abs(self.W)) + l2*T.sum(T.dot(self.W.T,self.W)) | |
self.grad_pos = T.grad(cost=self.pos_loss + self.regularizer, wrt=self.W) | |
self.grad_neg = T.grad(cost=self.neg_loss + self.regularizer, wrt=self.W) | |
self.pos_update = Arow([self.W], self.pos_loss, l1, l2, self.x).update | |
self.neg_update = Arow([self.W], self.neg_loss, l1, l2, self.x).update | |
self.apply = theano.function(inputs=[self.x], | |
outputs=T.log(self.p_1_given_x)) | |
class ArowLLearner(object): | |
"""Wrapper class for the arow logistic regression classifier""" | |
def __init__(self, Nfeats): | |
self.learner = ArowLR(Nfeats, 0.1, 0.1, 2.1) | |
self.apply = self.learner.apply | |
def update(self, f, cls): | |
if cls: | |
self.learner.pos_update(f) | |
else: | |
self.learner.neg_update(f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment