wbaek/multiclass_classifier.py

## multiclass_classifier.py
import math
import numpy as np

class Softmax:
    def forward(self, x):
        return np.exp(x) / np.exp(x).sum()
    def backward(self, y, target):
        return y - target
    def loss(self, y, target):
        return - np.sum( np.log(y) * target )

class ReLu:
    @staticmethod
    def function(x):
        mapper = np.zeros_like( x )
        return np.fmax( x, mapper )
    @staticmethod
    def derivative(x):
        return np.array( [[(1 if _>0 else 0) for _ in v] for v in x] )

class GradientDescent:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
    def update(self, param, gradient):
        param -= self.learning_rate * gradient
        return param

class Layer:
    def __init__(self, input_size, output_size,
            nonlinear_function=lambda x:x, derivative_function=lambda x:np.ones(x.shape),
            updater=GradientDescent() ):
        self.input_size = input_size
        self.output_size = output_size
        self.W = math.sqrt(6./(output_size+input_size)) * np.random.uniform( -1.0, 1.0, (output_size, input_size) )
        self.b = np.zeros( (output_size, 1) )
        self.params = [self.W, self.b]
        self.nonlinear_function = nonlinear_function
        self.derivative_function = derivative_function
        self.updater = updater
    def forward(self, x):
        self.x = x
        self.a = np.dot( self.W, x ) + self.b
        return self.nonlinear_function( self.a )
    def backward(self, delta):
        self.delta_a = delta * self.derivative_function(self.a)
        return np.dot( self.W.T, self.delta_a )
    def get_gradient(self):
        return ( np.dot(self.delta_a, self.x.T), np.dot(self.delta_a, np.ones((self.delta_a.shape[1], 1))) )
    def update(self):
        for param, gradient in zip(self.params, self.get_gradient()):
            param = self.updater.update(param, gradient)

class Network:
    def __init__(self):
        self.layers = []
        self.activation = Softmax()
    def predict(self, x):
        _ = x
        for layer in self.layers:
            _ = layer.forward( _ )
        return self.activation.forward( _ )
    def train(self, x, target):
        y = self.predict( x )
        _ = self.activation.backward( y, target )
        for layer in reversed( self.layers ):
            _ = layer.backward( _ )
            layer.update()
        return self.activation.loss( y, target )

if __name__ == "__main__":
    n = Network()
    n.layers.append( Layer(2, 10, ReLu.function, ReLu.derivative, updater=GradientDescent(learning_rate=0.01)) )
    n.layers.append( Layer(10, 2, updater=GradientDescent(learning_rate=0.01)) )
    n.activation = Softmax()
    for epoch in range(0, 20):
        loss = n.train( x = np.array([ [1, 2, 1, 2,  5, 6, 5, 6], [5, 4, 4, 5,  1, 2, 2, 1]]),
                   target = np.array([ [1, 1, 1, 1,  0, 0, 0, 0], [0, 0, 0, 0,  1, 1, 1, 1]]) )
        if epoch%5 == 0:
            print 'epoch:%04d loss:%.2f'%(epoch, loss)
	import math
	import numpy as np

	class Softmax:
	def forward(self, x):
	return np.exp(x) / np.exp(x).sum()
	def backward(self, y, target):
	return y - target
	def loss(self, y, target):
	return - np.sum( np.log(y) * target )

	class ReLu:
	@staticmethod
	def function(x):
	mapper = np.zeros_like( x )
	return np.fmax( x, mapper )
	@staticmethod
	def derivative(x):
	return np.array( [[(1 if _>0 else 0) for _ in v] for v in x] )

	class GradientDescent:
	def __init__(self, learning_rate=0.01):
	self.learning_rate = learning_rate
	def update(self, param, gradient):
	param -= self.learning_rate * gradient
	return param

	class Layer:
	def __init__(self, input_size, output_size,
	nonlinear_function=lambda x:x, derivative_function=lambda x:np.ones(x.shape),
	updater=GradientDescent() ):
	self.input_size = input_size
	self.output_size = output_size
	self.W = math.sqrt(6./(output_size+input_size)) * np.random.uniform( -1.0, 1.0, (output_size, input_size) )
	self.b = np.zeros( (output_size, 1) )
	self.params = [self.W, self.b]
	self.nonlinear_function = nonlinear_function
	self.derivative_function = derivative_function
	self.updater = updater
	def forward(self, x):
	self.x = x
	self.a = np.dot( self.W, x ) + self.b
	return self.nonlinear_function( self.a )
	def backward(self, delta):
	self.delta_a = delta * self.derivative_function(self.a)
	return np.dot( self.W.T, self.delta_a )
	def get_gradient(self):
	return ( np.dot(self.delta_a, self.x.T), np.dot(self.delta_a, np.ones((self.delta_a.shape[1], 1))) )
	def update(self):
	for param, gradient in zip(self.params, self.get_gradient()):
	param = self.updater.update(param, gradient)

	class Network:
	def __init__(self):
	self.layers = []
	self.activation = Softmax()
	def predict(self, x):
	_ = x
	for layer in self.layers:
	_ = layer.forward( _ )
	return self.activation.forward( _ )
	def train(self, x, target):
	y = self.predict( x )
	_ = self.activation.backward( y, target )
	for layer in reversed( self.layers ):
	_ = layer.backward( _ )
	layer.update()
	return self.activation.loss( y, target )

	if __name__ == "__main__":
	n = Network()
	n.layers.append( Layer(2, 10, ReLu.function, ReLu.derivative, updater=GradientDescent(learning_rate=0.01)) )
	n.layers.append( Layer(10, 2, updater=GradientDescent(learning_rate=0.01)) )
	n.activation = Softmax()
	for epoch in range(0, 20):
	loss = n.train( x = np.array([ [1, 2, 1, 2, 5, 6, 5, 6], [5, 4, 4, 5, 1, 2, 2, 1]]),
	target = np.array([ [1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1]]) )
	if epoch%5 == 0:
	print 'epoch:%04d loss:%.2f'%(epoch, loss)