stormxuwz/onlineClf.py

## onlineClf.py
from __future__ import division
import numpy as np


class OnlineClf(object):

	def __init__(self,iterNum,R):
		self.w=None
		self.iterNum=iterNum
		self.misNum=[[0,0]]
		self.accuracy=0
		self.name="oneline Clf"
		self.R=R

	def fit(self,X,Y):
		self.w=self.initializeWeights(X.shape[1])
		continousCorrect=0
		convergent=False
		for k in range(self.iterNum):
			if self.R>0:
				if convergent:
					break

			for i in range(X.shape[0]):
				sample_x=X[i,:].copy()
				sample_y=Y[i].copy()

				predScore=self.predict(sample_x)

				if sample_y*np.sign(predScore)<0:
					self.misNum.append([i+1,1])
					continousCorrect=0
				else:
					continousCorrect+=1

				if self.R>0:
					if continousCorrect>self.R:
						print "Convergent!"
						convergent=True
						break
				self.w=self.weightsUpdate(predScore,sample_x,sample_y)

		if (convergent is False) and (self.R>0):
			print "not convergent!"

		self.misNum=np.array(self.misNum)
		cumSum=np.cumsum(self.misNum[:,1])
		self.misNum[:,1]=cumSum

	def predict(self,X,score=True):
		wx=self.w[-1]+np.dot(X,self.w[:-1]).flatten()

		if score:
			return wx
		else:
			return np.sign(wx) # Treat boundary value as negative label

	def weightsUpdate(self,i,x,y):
		'''
		x: a single sample
		y: truth label for x
		w: original weights
		'''
		return 0

	def initializeWeights(self,n):
		'''
		Initialize the weights
		n: the number of features in the data
		'''
		pass

	def evaluation(self,testX,testY):
		pred=self.predict(testX,score=False)
		accuracy=np.sum(pred==testY)/len(testY)
		return {"accuracy":accuracy,"misNum":self.misNum[-1,1],"accuracyNum":np.sum(pred==testY),"error":1-accuracy}

class Perceptron(OnlineClf):

	def __init__(self,dataIter=20,eta=1,margin=0,R=0):
		super(Perceptron, self).__init__(dataIter,R)
		self.eta=eta
		self.margin=margin

		if self.margin>0:
			self.name="Perceptron with margin"
		else:
			self.name="Perceptron"

	def __str__(self):
		return str({"name":self.name,"eta":self.eta,"margin":self.margin,"R":self.R})

	def weightsUpdate(self,predScore,x,y):
		w=self.w.copy()

		if y*predScore>self.margin:
			return w
		else:
			# Update weights
			w[:-1]=w[:-1]+self.eta*y*x
			w[-1]=w[-1]+self.eta*y
			return w

	def initializeWeights(self,n):
		return np.zeros(n+1)

class Perceptron2(OnlineClf):

	def __init__(self,dataIter=20,eta=1,margin=0,R=0,beta=2):
		super(Perceptron2, self).__init__(dataIter,R)
		self.eta=eta
		self.margin=margin
		self.beta=beta
		if self.margin>0:
			self.name="Modified Perceptron"
		else:
			self.name="Modified Perceptron"

	def __str__(self):
		return str({"name":self.name,"eta":self.eta,"margin":self.margin,"R":self.R,"beta":self.beta})

	def weightsUpdate(self,predScore,x,y):
		w=self.w.copy()

		if y*predScore>self.beta*self.margin and y>0:
			return w
		elif y*predScore>self.margin and y<0:
			return w
		else:
			w[:-1]=w[:-1]+self.eta*y*x
			w[-1]=w[-1]+self.eta*y
			return w

	def initializeWeights(self,n):
		return np.zeros(n+1)

class Winnow(OnlineClf):

	def __init__(self,dataIter=20,alpha=1.1,margin=0,R=0):
		super(Winnow, self).__init__(dataIter,R)
		self.alpha=alpha
		self.margin=margin

		if self.margin>0:
			self.name="Winnow with margin"
		else:
			self.name="Winnow"


	def __str__(self):
		return str({"name":self.name,"alpha":self.alpha,"margin":self.margin,"R":self.R})

	def initializeWeights(self,n):
		w=np.zeros(n+1)+1
		w[-1]=-n
		return w

	def weightsUpdate(self,predScore,x,y):
		w=self.w.copy()

		if y*predScore>self.margin:
			return w
		else:
			w[:-1]=w[:-1]*self.alpha**(y*x)
			return w

class AdaGrad(OnlineClf):

	def __init__(self,dataIter=20,eta=1.5,R=0):
		super(AdaGrad, self).__init__(dataIter,R)
		self.eta=eta
		self.name="AdaGrad"
		self.Gt=None

	def __str__(self):
		return str({"name":self.name,"eta":self.eta,"R":self.R})

	def weightsUpdate(self,predScore,x,y):
		w=self.w.copy()

		if y*predScore>1:
			return w
		else:
			gt=self.w.copy()
			gt[:-1]=-y*x
			gt[-1]=-y
			self.Gt=self.Gt+gt**2
			# print self.Gt
			positiveIndex,=np.where(self.Gt>0)
			w[positiveIndex]=w[positiveIndex]-self.eta*gt[positiveIndex]/np.sqrt(self.Gt[positiveIndex])
			return w

	def initializeWeights(self,n):
		self.Gt=np.zeros(n+1)
		return np.zeros(n+1)


if __name__ == '__main__':
	import comparison
	handler=comparison.OnlineClfComparison()
	handler.readData('data_1_500.mat')

	X=handler.trainX[:5000,:]
	Y=handler.trainY[:5000]
	# print X.shape #=(5000,500)
	# print Y.shape #=(5000,)
	testX=handler.trainX[-5000:,:]
	testY=handler.trainY[-5000:]

	clfList=[Perceptron,Winnow,AdaGrad]

	for clf in clfList:
		myClf=clf()
		myClf.fit(X,Y)
		print myClf,
		print myClf.evaluation(testX,testY)
	from __future__ import division
	import numpy as np


	class OnlineClf(object):

	def __init__(self,iterNum,R):
	self.w=None
	self.iterNum=iterNum
	self.misNum=[[0,0]]
	self.accuracy=0
	self.name="oneline Clf"
	self.R=R

	def fit(self,X,Y):
	self.w=self.initializeWeights(X.shape[1])
	continousCorrect=0
	convergent=False
	for k in range(self.iterNum):
	if self.R>0:
	if convergent:
	break

	for i in range(X.shape[0]):
	sample_x=X[i,:].copy()
	sample_y=Y[i].copy()

	predScore=self.predict(sample_x)

	if sample_y*np.sign(predScore)<0:
	self.misNum.append([i+1,1])
	continousCorrect=0
	else:
	continousCorrect+=1

	if self.R>0:
	if continousCorrect>self.R:
	print "Convergent!"
	convergent=True
	break
	self.w=self.weightsUpdate(predScore,sample_x,sample_y)

	if (convergent is False) and (self.R>0):
	print "not convergent!"

	self.misNum=np.array(self.misNum)
	cumSum=np.cumsum(self.misNum[:,1])
	self.misNum[:,1]=cumSum

	def predict(self,X,score=True):
	wx=self.w[-1]+np.dot(X,self.w[:-1]).flatten()

	if score:
	return wx
	else:
	return np.sign(wx) # Treat boundary value as negative label

	def weightsUpdate(self,i,x,y):
	'''
	x: a single sample
	y: truth label for x
	w: original weights
	'''
	return 0

	def initializeWeights(self,n):
	'''
	Initialize the weights
	n: the number of features in the data
	'''
	pass

	def evaluation(self,testX,testY):
	pred=self.predict(testX,score=False)
	accuracy=np.sum(pred==testY)/len(testY)
	return {"accuracy":accuracy,"misNum":self.misNum[-1,1],"accuracyNum":np.sum(pred==testY),"error":1-accuracy}

	class Perceptron(OnlineClf):

	def __init__(self,dataIter=20,eta=1,margin=0,R=0):
	super(Perceptron, self).__init__(dataIter,R)
	self.eta=eta
	self.margin=margin

	if self.margin>0:
	self.name="Perceptron with margin"
	else:
	self.name="Perceptron"

	def __str__(self):
	return str({"name":self.name,"eta":self.eta,"margin":self.margin,"R":self.R})

	def weightsUpdate(self,predScore,x,y):
	w=self.w.copy()

	if y*predScore>self.margin:
	return w
	else:
	# Update weights
	w[:-1]=w[:-1]+self.etayx
	w[-1]=w[-1]+self.eta*y
	return w

	def initializeWeights(self,n):
	return np.zeros(n+1)

	class Perceptron2(OnlineClf):

	def __init__(self,dataIter=20,eta=1,margin=0,R=0,beta=2):
	super(Perceptron2, self).__init__(dataIter,R)
	self.eta=eta
	self.margin=margin
	self.beta=beta
	if self.margin>0:
	self.name="Modified Perceptron"
	else:
	self.name="Modified Perceptron"

	def __str__(self):
	return str({"name":self.name,"eta":self.eta,"margin":self.margin,"R":self.R,"beta":self.beta})

	def weightsUpdate(self,predScore,x,y):
	w=self.w.copy()

	if ypredScore>self.betaself.margin and y>0:
	return w
	elif y*predScore>self.margin and y<0:
	return w
	else:
	w[:-1]=w[:-1]+self.etayx
	w[-1]=w[-1]+self.eta*y
	return w

	def initializeWeights(self,n):
	return np.zeros(n+1)

	class Winnow(OnlineClf):

	def __init__(self,dataIter=20,alpha=1.1,margin=0,R=0):
	super(Winnow, self).__init__(dataIter,R)
	self.alpha=alpha
	self.margin=margin

	if self.margin>0:
	self.name="Winnow with margin"
	else:
	self.name="Winnow"


	def __str__(self):
	return str({"name":self.name,"alpha":self.alpha,"margin":self.margin,"R":self.R})

	def initializeWeights(self,n):
	w=np.zeros(n+1)+1
	w[-1]=-n
	return w

	def weightsUpdate(self,predScore,x,y):
	w=self.w.copy()

	if y*predScore>self.margin:
	return w
	else:
	w[:-1]=w[:-1]self.alpha(yx)
	return w

	class AdaGrad(OnlineClf):

	def __init__(self,dataIter=20,eta=1.5,R=0):
	super(AdaGrad, self).__init__(dataIter,R)
	self.eta=eta
	self.name="AdaGrad"
	self.Gt=None

	def __str__(self):
	return str({"name":self.name,"eta":self.eta,"R":self.R})

	def weightsUpdate(self,predScore,x,y):
	w=self.w.copy()

	if y*predScore>1:
	return w
	else:
	gt=self.w.copy()
	gt[:-1]=-y*x
	gt[-1]=-y
	self.Gt=self.Gt+gt**2
	# print self.Gt
	positiveIndex,=np.where(self.Gt>0)
	w[positiveIndex]=w[positiveIndex]-self.eta*gt[positiveIndex]/np.sqrt(self.Gt[positiveIndex])
	return w

	def initializeWeights(self,n):
	self.Gt=np.zeros(n+1)
	return np.zeros(n+1)



	if __name__ == '__main__':
	import comparison
	handler=comparison.OnlineClfComparison()
	handler.readData('data_1_500.mat')

	X=handler.trainX[:5000,:]
	Y=handler.trainY[:5000]
	# print X.shape #=(5000,500)
	# print Y.shape #=(5000,)
	testX=handler.trainX[-5000:,:]
	testY=handler.trainY[-5000:]

	clfList=[Perceptron,Winnow,AdaGrad]

	for clf in clfList:
	myClf=clf()
	myClf.fit(X,Y)
	print myClf,
	print myClf.evaluation(testX,testY)