Skip to content

Instantly share code, notes, and snippets.

@adriaciurana
Created March 18, 2020 00:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adriaciurana/dee4eba4c645623c3b452d1e7506adbc to your computer and use it in GitHub Desktop.
Save adriaciurana/dee4eba4c645623c3b452d1e7506adbc to your computer and use it in GitHub Desktop.
MLP using only NumPy
import numpy as np
class Optimizer:
def __init__(self, mu=0.9, lr=0.01):
self.cache = {}
self.mu = mu
self.lr = lr
def update(self, name_w, old_w, dw):
if name_w in self.cache:
self.cache[name_w] = self.cache[name_w] * self.mu + dw * (1 - self.mu)
else:
self.cache[name_w] = dw
return old_w - self.lr * self.cache[name_w]
class Layer:
COUNTER = 0
def __init__(self):
self.id = Layer.COUNTER
Layer.COUNTER += 1
self.cache = {}
def backward(self, dout):
return dout, {}
def update(self, dout):
din, weights = self.backward(dout)
for k, dw in weights.items():
old_value = getattr(self, k)
new_value = optimizer.update(str(self.id) + "_" + k, old_value, dw)
setattr(self, k, new_value)
return din
class FC(Layer):
def __init__(self, input_size, output_size, add_bias=True):
super().__init__()
self.w = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(input_size, output_size))
self.w /= input_size
if add_bias:
self.b = np.random.normal(loc=0.0, scale=1/np.sqrt(2), size=(output_size, ))
self.b /= output_size
else:
self.b = None
def __call__(self, x):
# W: batch x input X Input x Output = Batch x Output
# B: Batch x Output + Output
self.cache = {
'x': x
}
if self.b is not None:
return (x @ self.w) + self.b
return x @ self.w
def backward(self, dout):
# dout: Batch x Output
batch_size = dout.shape[0]
# dF/din: Batch x Output X Output x Input = Batch x Input
dF_din = dout @ self.w.T
# dF/dW: Input x Batch X Batch x Output = Input x Output
dF_dW = (self.cache['x'].T @ dout) / batch_size
# dF/dW: 1 x Batch X Batch x Output = 1 x Output
#dF_dB = np.ones((1, batch_size)).dot(dout).flatten()
dF_dB = np.mean(dout, axis=0)
return dF_din, {'w': dF_dW, 'b': dF_dB}
class ReLU(Layer):
def __init__(self):
super().__init__()
def __call__(self, x):
mask = x > 0
self.cache['mask'] = mask
return mask * x
def backward(self, dout):
# dF/din: Batch x Input o Batch x Input = Batch x Input
return dout * self.cache['mask'], {}
class Sigmoid(Layer):
def __init__(self):
super().__init__()
def __call__(self, x):
output = 1./(1. + np.exp(- x))
self.cache['sigmoid'] = output
return output
def backward(self, dout):
# dF/din: Batch x Input o Batch x Input = Batch x Input
return dout * (self.cache['sigmoid'] * (1 - self.cache['sigmoid'])), {}
class Softmax(Layer):
def __init__(self):
super().__init__()
def __call__(self, x):
max_v = np.max(x)
value_exp = np.exp(x - max_v)
softmax_value = value_exp / (np.sum(value_exp, axis=1, keepdims=True) + 1e-10)
self.cache['softmax'] = softmax_value
return softmax_value
def backward(self, dout):
# dout: Batch x Output
# dF/din: Batch x Output X Output x Input = Batch x Input
softmax_value = self.cache['softmax']
outputs = []
for i in range(softmax_value.shape[0]):
softmax_value_single = softmax_value[i]
sisi = softmax_value_single * (1 - softmax_value_single)
sisj = - softmax_value_single.reshape(-1, 1) @ softmax_value_single.reshape(1, -1)
din_single = np.empty((dout.shape[1], dout.shape[1]))
mask = np.eye(dout.shape[1], dtype=np.bool)
din_single[mask] = sisi
din_single[~mask] = sisj[~mask]
outputs.append(dout[i].reshape(1, -1) @ din_single)
return np.concatenate(outputs, axis=0), {}
class BinaryCrossEntropy:
def __init__(self):
super().__init__()
def __call__(self, x, y):
self.cache = {
'x': x,
'y': y
}
return np.mean(- y * np.log(x + 1e-10) - (1 - y) * np.log(1 - x + 1e-10))
def backward(self):
return - (self.cache['y'] / (self.cache['x'] + 1e-10)) + ((1 - self.cache['y']) / (1 - self.cache['x'] + 1e-10))
class CrossEntropy:
def __init__(self):
super().__init__()
def __call__(self, x, y):
self.cache = {
'x': x,
'y': y
}
return np.mean(- y * np.log(x + 1e-10))
def backward(self):
return - self.cache['y'] / (self.cache['x'] + 1e-10)
class MultiClassAccuracy:
def __init__(self):
pass
def __call__(self, x, y):
return np.mean(np.argmax(x, axis=-1) == np.argmax(y, axis=-1))
class MyNet:
def __init__(self):
self.fc1 = FC(5, 10)
self.fc1_relu = ReLU()
self.fc2 = FC(10, 5)
self.fc2_softmax = Softmax()
def __call__(self, x):
x = self.fc1(x)
x = self.fc1_relu(x)
x = self.fc2(x)
x = self.fc2_softmax(x)
return x
def update(self, dout):
dout = self.fc2_softmax.update(dout)
dout = self.fc2.update(dout)
dout = self.fc1_relu.update(dout)
dout = self.fc1.update(dout)
return dout
optimizer = Optimizer()
model = MyNet()
loss = CrossEntropy() #BinaryCrossEntropy()
accuracy = MultiClassAccuracy()
x_ = np.eye(5)
y_ = np.eye(5)[::-1, :]
print(x_)
print(y_)
for i in range(15000):
output = model(x_)
loss_value = loss(output, y_)
accuracy_value = accuracy(output, y_)
model.update(loss.backward())
if i % 100 == 0:
print('Loss:', round(loss_value, 2), 'Accuracy:', round(accuracy_value, 2))
print(np.argmax(model(x_), axis=-1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment