Last active
September 1, 2018 23:40
-
-
Save justanotherminh/48b2c588ee22c62044877dd6f5281987 to your computer and use it in GitHub Desktop.
Simple 2-layer feedforward neural networ from scratch with brand new SELU activation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def sigmoid(v): | |
return 1 / (1 + np.exp(-v)) | |
def one_hot(x): | |
N = x.size | |
D = x.max() + 1 | |
label = np.zeros(N, D) | |
label[np.arange(N), x] = 1. | |
return label | |
def selu(x): | |
alpha = 1.6732632423543772848170429916717 | |
lamb = 1.0507009873554804934193349852946 | |
return lamb * np.where(x > 0., x, alpha * np.exp(x) - alpha) | |
def selu_backward(x): | |
alpha = 1.6732632423543772848170429916717 | |
lamb = 1.0507009873554804934193349852946 | |
return lamb * np.where(x > 0., 1., alpha * np.exp(x)) | |
def softmax(logits): | |
e = np.exp(logits) | |
return e / np.expand_dims(e.sum(axis=1), axis=1) | |
def get_data_batch(data, label, batch_size): | |
N = data.shape[0] | |
mask = np.random.choice(N, batch_size, replace=False) | |
return data[mask, :], label[mask, :] | |
def loss(prob, label): | |
ce = -label * np.log(prob) | |
return ce.mean(axis=0).sum() | |
def conv_forward(x, w, b, stride, pad): | |
N, C, H, W = x.shape | |
F, _, HH, WW = w.shape | |
H_out = 1 + (H + 2 * pad - HH) / stride | |
W_out = 1 + (W + 2 * pad - WW) / stride | |
out = np.empty([N, F, H_out, W_out]) | |
x_pad = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), 'constant', | |
constant_values=((None, None), (None, None), (0, 0), (0, 0))) | |
for u in xrange(H_out): | |
for v in xrange(W_out): | |
# patch.shape = N, C, HH, WW | |
patch = x_pad[:, :, stride * u:stride * u + HH, stride * v:stride * v + WW] | |
# Why didn't I figure this out earlier? | |
out[:, :, u, v] = np.dot(patch.reshape(patch.shape[0], -1), | |
w.reshape(w.shape[0], -1).T) + b[None, :] | |
return out | |
def conv_backward(dout, x, w, b, stride, pad): | |
N, C, H, W = x.shape | |
F, _, HH, WW = w.shape # F, C, HH, WW | |
_, _, H_out, W_out = dout.shape # N, F, H', W' | |
dw, db = np.zeros_like(w), np.zeros_like(b) | |
x_pad = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), 'constant', | |
constant_values=((None, None), (None, None), (0, 0), (0, 0))) | |
dx = np.zeros_like(x_pad) | |
for u in xrange(H_out): | |
for v in xrange(W_out): | |
patch = x_pad[:, :, stride * u:stride * u + HH, stride * v:stride * v + WW] | |
dout_patch = dout[:, :, u, v] | |
dx[:, :, stride * u:stride * u + HH, stride * v:stride * v + WW] += \ | |
np.dot(dout_patch, w.reshape(w.shape[0], -1)).reshape(N, C, HH, WW) | |
dw += dout_patch.T.dot(patch.reshape(patch.shape[0], -1)).reshape(F, C, HH, WW) | |
db += dout_patch.sum(axis=0).flatten() | |
dx = dx[:, :, pad:-pad, pad:-pad] | |
return dx, dw, db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from layers import selu, selu_backward, softmax, get_data_batch, loss | |
np.random.seed(1234) # Quality control | |
def net(x, params): | |
W1, W2, b1, b2 = params | |
# h = sigmoid(x.dot(W1) + b1) | |
a = x.dot(W1) + b1 | |
# h[h < 0] *= 0.1 # Leaky ReLU | |
h = selu(a) | |
out = softmax(h.dot(W2) + b2) | |
return out, h | |
def gradient(dout, x, params, cache): | |
W1, W2, b1, b2 = params | |
h = cache | |
db2 = dout.sum(axis=0)[None, :] | |
dW2 = h.T.dot(dout) | |
dh = dout.dot(W2.T) | |
# dh *= h * (1 - h) | |
# dh[h < 0] *= 0.1 | |
dh *= selu_backward(h) | |
db1 = dh.sum(axis=0)[None, :] | |
dW1 = x.T.dot(dh) | |
return dW1, dW2, db1, db2 | |
def train(): | |
from keras.datasets import mnist | |
from keras.utils.np_utils import to_categorical | |
(X_train, y_train), (X_test, y_test) = mnist.load_data() | |
y_train, y_test = to_categorical(y_train, 10), to_categorical(y_test, 10) | |
X_train, X_test = X_train / 255., X_test / 255. | |
X_train, X_test = X_train.reshape(-1, 784), X_test.reshape(-1, 784) | |
params_shapes = [[784, 256], [256, 10], [1, 256], [1, 10]] | |
params = [0.01 * np.random.randn(h, w) for h, w in params_shapes] | |
grad_cache = [np.zeros(shape) for shape in params_shapes] | |
lr = 1e-3 | |
gamma = 0.9 | |
for i in xrange(5000): | |
X, y = get_data_batch(X_train, y_train, 128) | |
prob, cache = net(X, params) | |
grad = gradient(prob - y, X, [p - gamma * v for p, v in zip(params, grad_cache)], cache) # Nesterov | |
# grad = gradient(prob - y, X, params, cache) | |
for g, gc in zip(grad, grad_cache): | |
gc *= gamma | |
gc += lr * g | |
if i % 100 == 0: | |
print 'Loss: {:.5f}'.format(loss(prob, y)) | |
for w, d in zip(params, grad_cache): | |
w -= d | |
prob, _ = net(X_test, params) | |
print 'Validation Accuracy: {:.4f}'.format(np.mean(prob.argmax(axis=1) == y_test.argmax(axis=1))) | |
for i, w in enumerate(params): | |
np.save('saved_networks/np_weights/{}.npy'.format(i), w) | |
if __name__ == '__main__': | |
train() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment