Created
March 18, 2020 00:19
-
-
Save adriaciurana/dee4eba4c645623c3b452d1e7506adbc to your computer and use it in GitHub Desktop.
MLP using only NumPy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class Optimizer: | |
def __init__(self, mu=0.9, lr=0.01): | |
self.cache = {} | |
self.mu = mu | |
self.lr = lr | |
def update(self, name_w, old_w, dw): | |
if name_w in self.cache: | |
self.cache[name_w] = self.cache[name_w] * self.mu + dw * (1 - self.mu) | |
else: | |
self.cache[name_w] = dw | |
return old_w - self.lr * self.cache[name_w] | |
class Layer: | |
COUNTER = 0 | |
def __init__(self): | |
self.id = Layer.COUNTER | |
Layer.COUNTER += 1 | |
self.cache = {} | |
def backward(self, dout): | |
return dout, {} | |
def update(self, dout): | |
din, weights = self.backward(dout) | |
for k, dw in weights.items(): | |
old_value = getattr(self, k) | |
new_value = optimizer.update(str(self.id) + "_" + k, old_value, dw) | |
setattr(self, k, new_value) | |
return din | |
class FC(Layer): | |
def __init__(self, input_size, output_size, add_bias=True): | |
super().__init__() | |
self.w = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(input_size, output_size)) | |
self.w /= input_size | |
if add_bias: | |
self.b = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(output_size, )) | |
self.b /= output_size | |
else: | |
self.b = None | |
def __call__(self, x): | |
# W: batch x input X Input x Output = Batch x Output | |
# B: Batch x Output + Output | |
self.cache = { | |
'x': x | |
} | |
if self.b is not None: | |
return (x @ self.w) + self.b | |
return x @ self.w | |
def backward(self, dout): | |
# dout: Batch x Output | |
batch_size = dout.shape[0] | |
# dF/din: Batch x Output X Output x Input = Batch x Input | |
dF_din = dout @ self.w.T | |
# dF/dW: Input x Batch X Batch x Output = Input x Output | |
dF_dW = (self.cache['x'].T @ dout) / batch_size | |
# dF/dW: 1 x Batch X Batch x Output = 1 x Output | |
#dF_dB = np.ones((1, batch_size)).dot(dout).flatten() | |
dF_dB = np.mean(dout, axis=0) | |
return dF_din, {'w': dF_dW, 'b': dF_dB} | |
class ReLU(Layer): | |
def __init__(self): | |
super().__init__() | |
def __call__(self, x): | |
mask = x > 0 | |
self.cache['mask'] = mask | |
return mask * x | |
def backward(self, dout): | |
# dF/din: Batch x Input o Batch x Input = Batch x Input | |
return dout * self.cache['mask'], {} | |
class Sigmoid(Layer): | |
def __init__(self): | |
super().__init__() | |
def __call__(self, x): | |
output = 1./(1. + np.exp(- x)) | |
self.cache['sigmoid'] = output | |
return output | |
def backward(self, dout): | |
# dF/din: Batch x Input o Batch x Input = Batch x Input | |
return dout * (self.cache['sigmoid'] * (1 - self.cache['sigmoid'])), {} | |
class Softmax(Layer): | |
def __init__(self): | |
super().__init__() | |
def __call__(self, x): | |
max_v = np.max(x) | |
value_exp = np.exp(x - max_v) | |
softmax_value = value_exp / (np.sum(value_exp, axis=1, keepdims=True) + 1e-10) | |
self.cache['softmax'] = softmax_value | |
return softmax_value | |
def backward(self, dout): | |
# dout: Batch x Output | |
# dF/din: Batch x Output X Output x Input = Batch x Input | |
softmax_value = self.cache['softmax'] | |
outputs = [] | |
for i in range(softmax_value.shape[0]): | |
softmax_value_single = softmax_value[i] | |
sisi = softmax_value_single * (1 - softmax_value_single) | |
sisj = - softmax_value_single.reshape(-1, 1) @ softmax_value_single.reshape(1, -1) | |
din_single = np.empty((dout.shape[1], dout.shape[1])) | |
mask = np.eye(dout.shape[1], dtype=np.bool) | |
din_single[mask] = sisi | |
din_single[~mask] = sisj[~mask] | |
outputs.append(dout[i].reshape(1, -1) @ din_single) | |
return np.concatenate(outputs, axis=0), {} | |
class BinaryCrossEntropy: | |
def __init__(self): | |
super().__init__() | |
def __call__(self, x, y): | |
self.cache = { | |
'x': x, | |
'y': y | |
} | |
return np.mean(- y * np.log(x + 1e-10) - (1 - y) * np.log(1 - x + 1e-10)) | |
def backward(self): | |
return - (self.cache['y'] / (self.cache['x'] + 1e-10)) + ((1 - self.cache['y']) / (1 - self.cache['x'] + 1e-10)) | |
class CrossEntropy: | |
def __init__(self): | |
super().__init__() | |
def __call__(self, x, y): | |
self.cache = { | |
'x': x, | |
'y': y | |
} | |
return np.mean(- y * np.log(x + 1e-10)) | |
def backward(self): | |
return - self.cache['y'] / (self.cache['x'] + 1e-10) | |
class MultiClassAccuracy: | |
def __init__(self): | |
pass | |
def __call__(self, x, y): | |
return np.mean(np.argmax(x, axis=-1) == np.argmax(y, axis=-1)) | |
class MyNet: | |
def __init__(self): | |
self.fc1 = FC(5, 10) | |
self.fc1_relu = ReLU() | |
self.fc2 = FC(10, 5) | |
self.fc2_softmax = Softmax() | |
def __call__(self, x): | |
x = self.fc1(x) | |
x = self.fc1_relu(x) | |
x = self.fc2(x) | |
x = self.fc2_softmax(x) | |
return x | |
def update(self, dout): | |
dout = self.fc2_softmax.update(dout) | |
dout = self.fc2.update(dout) | |
dout = self.fc1_relu.update(dout) | |
dout = self.fc1.update(dout) | |
return dout | |
optimizer = Optimizer() | |
model = MyNet() | |
loss = CrossEntropy() #BinaryCrossEntropy() | |
accuracy = MultiClassAccuracy() | |
x_ = np.eye(5) | |
y_ = np.eye(5)[::-1, :] | |
print(x_) | |
print(y_) | |
for i in range(15000): | |
output = model(x_) | |
loss_value = loss(output, y_) | |
accuracy_value = accuracy(output, y_) | |
model.update(loss.backward()) | |
if i % 100 == 0: | |
print('Loss:', round(loss_value, 2), 'Accuracy:', round(accuracy_value, 2)) | |
print(np.argmax(model(x_), axis=-1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment