adriaciurana/MLP_using_only_np.py

## MLP_using_only_np.py
import numpy as np

class Optimizer:
	def __init__(self, mu=0.9, lr=0.01):
		self.cache = {}
		self.mu = mu
		self.lr = lr

	def update(self, name_w, old_w, dw):
		if name_w in self.cache:
			self.cache[name_w] = self.cache[name_w] * self.mu  + dw * (1 - self.mu)
		else:
			self.cache[name_w] = dw
		return old_w - self.lr * self.cache[name_w]

class Layer:
	COUNTER = 0
	def __init__(self):
		self.id = Layer.COUNTER
		Layer.COUNTER += 1
		self.cache = {}

	def backward(self, dout):
		return dout, {}

	def update(self, dout):
		din, weights = self.backward(dout)

		for k, dw in weights.items():
			old_value = getattr(self, k)
			new_value = optimizer.update(str(self.id) + "_" + k, old_value, dw)
			setattr(self, k, new_value)

		return din

class FC(Layer):
	def __init__(self, input_size, output_size, add_bias=True):
		super().__init__()
		self.w = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(input_size, output_size))
		self.w /= input_size

		if add_bias:
			self.b = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(output_size, ))
			self.b /= output_size
		else:
			self.b = None

	def __call__(self, x):
		# W: batch x input X Input x Output = Batch x Output
		# B: Batch x Output + Output
		self.cache = {
			'x': x
		}

		if self.b is not None:
			return (x @ self.w) + self.b
		return x @ self.w

	def backward(self, dout):
		# dout: Batch x Output
		batch_size = dout.shape[0]

		# dF/din: Batch x Output X Output x Input = Batch x Input
		dF_din = dout @ self.w.T

		# dF/dW: Input x Batch X Batch x Output = Input x Output
		dF_dW = (self.cache['x'].T @ dout) / batch_size

		# dF/dW: 1 x Batch X Batch x Output =  1 x Output
		#dF_dB = np.ones((1, batch_size)).dot(dout).flatten()
		dF_dB = np.mean(dout, axis=0)

		return dF_din, {'w': dF_dW, 'b': dF_dB}

class ReLU(Layer):
	def __init__(self):
		super().__init__()

	def __call__(self, x):
		mask = x > 0
		self.cache['mask'] = mask
		return mask * x

	def backward(self, dout):
		# dF/din: Batch x Input o Batch x Input = Batch x Input
		return dout * self.cache['mask'], {}

class Sigmoid(Layer):
	def __init__(self):
		super().__init__()

	def __call__(self, x):
		output = 1./(1. + np.exp(- x))
		self.cache['sigmoid'] = output
		return output

	def backward(self, dout):
		# dF/din: Batch x Input o Batch x Input = Batch x Input
		return dout * (self.cache['sigmoid'] * (1 - self.cache['sigmoid'])), {}

class Softmax(Layer):
	def __init__(self):
		super().__init__()

	def __call__(self, x):
		max_v = np.max(x)
		value_exp = np.exp(x - max_v)
		softmax_value = value_exp / (np.sum(value_exp, axis=1, keepdims=True) + 1e-10)
		self.cache['softmax'] = softmax_value
		return softmax_value

	def backward(self, dout):
		# dout: Batch x Output

		# dF/din: Batch x Output X Output x Input = Batch x Input
		softmax_value = self.cache['softmax']

		outputs = []
		for i in range(softmax_value.shape[0]):
			softmax_value_single = softmax_value[i]

			sisi = softmax_value_single * (1 - softmax_value_single)
			sisj = - softmax_value_single.reshape(-1, 1) @ softmax_value_single.reshape(1, -1)

			din_single = np.empty((dout.shape[1], dout.shape[1]))
			mask = np.eye(dout.shape[1], dtype=np.bool)
			din_single[mask] = sisi
			din_single[~mask] = sisj[~mask]

			outputs.append(dout[i].reshape(1, -1) @ din_single)

		return np.concatenate(outputs, axis=0), {}

class BinaryCrossEntropy:
	def __init__(self):
		super().__init__()

	def __call__(self, x, y):
		self.cache = {
			'x': x,
			'y': y
		}
		return np.mean(- y * np.log(x + 1e-10) - (1 - y) * np.log(1 - x + 1e-10))

	def backward(self):
		return - (self.cache['y'] / (self.cache['x'] + 1e-10)) + ((1 - self.cache['y']) / (1 - self.cache['x'] + 1e-10))

class CrossEntropy:
	def __init__(self):
		super().__init__()

	def __call__(self, x, y):
		self.cache = {
			'x': x,
			'y': y
		}
		return np.mean(- y * np.log(x + 1e-10))

	def backward(self):
		return - self.cache['y'] / (self.cache['x'] + 1e-10)

class MultiClassAccuracy:
	def __init__(self):
		pass

	def __call__(self, x, y):
		return np.mean(np.argmax(x, axis=-1) == np.argmax(y, axis=-1))

class MyNet:
	def __init__(self):
		self.fc1 = FC(5, 10)
		self.fc1_relu = ReLU()

		self.fc2 = FC(10, 5)
		self.fc2_softmax = Softmax()

	def __call__(self, x):
		x = self.fc1(x)
		x = self.fc1_relu(x)

		x = self.fc2(x)
		x = self.fc2_softmax(x)

		return x

	def update(self, dout):
		dout = self.fc2_softmax.update(dout)
		dout = self.fc2.update(dout)

		dout = self.fc1_relu.update(dout)
		dout = self.fc1.update(dout)

		return dout

optimizer = Optimizer()
model = MyNet()
loss = CrossEntropy() #BinaryCrossEntropy()
accuracy = MultiClassAccuracy()

x_ = np.eye(5)
y_ = np.eye(5)[::-1, :]
print(x_)
print(y_)

for i in range(15000):
	output = model(x_)
	loss_value = loss(output, y_)
	accuracy_value = accuracy(output, y_)
	model.update(loss.backward())

	if i % 100 == 0:
		print('Loss:', round(loss_value, 2), 'Accuracy:', round(accuracy_value, 2))
print(np.argmax(model(x_), axis=-1))
	import numpy as np

	class Optimizer:
	def __init__(self, mu=0.9, lr=0.01):
	self.cache = {}
	self.mu = mu
	self.lr = lr

	def update(self, name_w, old_w, dw):
	if name_w in self.cache:
	self.cache[name_w] = self.cache[name_w] * self.mu + dw * (1 - self.mu)
	else:
	self.cache[name_w] = dw
	return old_w - self.lr * self.cache[name_w]

	class Layer:
	COUNTER = 0
	def __init__(self):
	self.id = Layer.COUNTER
	Layer.COUNTER += 1
	self.cache = {}

	def backward(self, dout):
	return dout, {}

	def update(self, dout):
	din, weights = self.backward(dout)

	for k, dw in weights.items():
	old_value = getattr(self, k)
	new_value = optimizer.update(str(self.id) + "_" + k, old_value, dw)
	setattr(self, k, new_value)

	return din

	class FC(Layer):
	def __init__(self, input_size, output_size, add_bias=True):
	super().__init__()
	self.w = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(input_size, output_size))
	self.w /= input_size

	if add_bias:
	self.b = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(output_size, ))
	self.b /= output_size
	else:
	self.b = None

	def __call__(self, x):
	# W: batch x input X Input x Output = Batch x Output
	# B: Batch x Output + Output
	self.cache = {
	'x': x
	}

	if self.b is not None:
	return (x @ self.w) + self.b
	return x @ self.w

	def backward(self, dout):
	# dout: Batch x Output
	batch_size = dout.shape[0]

	# dF/din: Batch x Output X Output x Input = Batch x Input
	dF_din = dout @ self.w.T

	# dF/dW: Input x Batch X Batch x Output = Input x Output
	dF_dW = (self.cache['x'].T @ dout) / batch_size

	# dF/dW: 1 x Batch X Batch x Output = 1 x Output
	#dF_dB = np.ones((1, batch_size)).dot(dout).flatten()
	dF_dB = np.mean(dout, axis=0)

	return dF_din, {'w': dF_dW, 'b': dF_dB}

	class ReLU(Layer):
	def __init__(self):
	super().__init__()

	def __call__(self, x):
	mask = x > 0
	self.cache['mask'] = mask
	return mask * x

	def backward(self, dout):
	# dF/din: Batch x Input o Batch x Input = Batch x Input
	return dout * self.cache['mask'], {}

	class Sigmoid(Layer):
	def __init__(self):
	super().__init__()

	def __call__(self, x):
	output = 1./(1. + np.exp(- x))
	self.cache['sigmoid'] = output
	return output

	def backward(self, dout):
	# dF/din: Batch x Input o Batch x Input = Batch x Input
	return dout * (self.cache['sigmoid'] * (1 - self.cache['sigmoid'])), {}

	class Softmax(Layer):
	def __init__(self):
	super().__init__()

	def __call__(self, x):
	max_v = np.max(x)
	value_exp = np.exp(x - max_v)
	softmax_value = value_exp / (np.sum(value_exp, axis=1, keepdims=True) + 1e-10)
	self.cache['softmax'] = softmax_value
	return softmax_value

	def backward(self, dout):
	# dout: Batch x Output

	# dF/din: Batch x Output X Output x Input = Batch x Input
	softmax_value = self.cache['softmax']

	outputs = []
	for i in range(softmax_value.shape[0]):
	softmax_value_single = softmax_value[i]

	sisi = softmax_value_single * (1 - softmax_value_single)
	sisj = - softmax_value_single.reshape(-1, 1) @ softmax_value_single.reshape(1, -1)

	din_single = np.empty((dout.shape[1], dout.shape[1]))
	mask = np.eye(dout.shape[1], dtype=np.bool)
	din_single[mask] = sisi
	din_single[~mask] = sisj[~mask]

	outputs.append(dout[i].reshape(1, -1) @ din_single)

	return np.concatenate(outputs, axis=0), {}

	class BinaryCrossEntropy:
	def __init__(self):
	super().__init__()

	def __call__(self, x, y):
	self.cache = {
	'x': x,
	'y': y
	}
	return np.mean(- y * np.log(x + 1e-10) - (1 - y) * np.log(1 - x + 1e-10))

	def backward(self):
	return - (self.cache['y'] / (self.cache['x'] + 1e-10)) + ((1 - self.cache['y']) / (1 - self.cache['x'] + 1e-10))

	class CrossEntropy:
	def __init__(self):
	super().__init__()

	def __call__(self, x, y):
	self.cache = {
	'x': x,
	'y': y
	}
	return np.mean(- y * np.log(x + 1e-10))

	def backward(self):
	return - self.cache['y'] / (self.cache['x'] + 1e-10)

	class MultiClassAccuracy:
	def __init__(self):
	pass

	def __call__(self, x, y):
	return np.mean(np.argmax(x, axis=-1) == np.argmax(y, axis=-1))

	class MyNet:
	def __init__(self):
	self.fc1 = FC(5, 10)
	self.fc1_relu = ReLU()

	self.fc2 = FC(10, 5)
	self.fc2_softmax = Softmax()

	def __call__(self, x):
	x = self.fc1(x)
	x = self.fc1_relu(x)

	x = self.fc2(x)
	x = self.fc2_softmax(x)

	return x

	def update(self, dout):
	dout = self.fc2_softmax.update(dout)
	dout = self.fc2.update(dout)

	dout = self.fc1_relu.update(dout)
	dout = self.fc1.update(dout)

	return dout

	optimizer = Optimizer()
	model = MyNet()
	loss = CrossEntropy() #BinaryCrossEntropy()
	accuracy = MultiClassAccuracy()

	x_ = np.eye(5)
	y_ = np.eye(5)[::-1, :]
	print(x_)
	print(y_)

	for i in range(15000):
	output = model(x_)
	loss_value = loss(output, y_)
	accuracy_value = accuracy(output, y_)
	model.update(loss.backward())

	if i % 100 == 0:
	print('Loss:', round(loss_value, 2), 'Accuracy:', round(accuracy_value, 2))
	print(np.argmax(model(x_), axis=-1))