Skip to content

Instantly share code, notes, and snippets.

@mw3i
Last active June 27, 2019 15:29
Show Gist options
  • Save mw3i/c5e0bcc8b69e76908d9625c5b40b8f08 to your computer and use it in GitHub Desktop.
Save mw3i/c5e0bcc8b69e76908d9625c5b40b8f08 to your computer and use it in GitHub Desktop.
Feed Forward Neural Net Classifier in Numpy
## ext requirements
import numpy as np
# - - - - - - - - - - - - - - - - - -
# -- Model --
# - - - - - - - - - - - - - - - - - -
## produces model outputs
def forward(params: dict, inputs: np.ndarray, hps: np.ndarray) -> list:
hidden_act_raw = np.add(
np.matmul(
inputs,
params['input']['hidden']['weights']
),
params['input']['hidden']['bias']
)
hidden_act = hps['hidden_activation'](hidden_act_raw)
output_act_raw = np.add(
np.matmul(
hidden_act,
params['hidden']['output']['weights']
),
params['hidden']['output']['bias'],
)
output_act = hps['output_activation'](output_act_raw)
return [hidden_act_raw, hidden_act, output_act_raw, output_act]
## logistic loss function
def loss(params: dict, inputs: np.ndarray, targets: np.ndarray, hps: dict) -> float:
return np.sum(
np.square(
np.subtract(
forward(params, inputs, hps)[-1],
targets
)
)
) / inputs.shape[0]
## backprop (for sum squared error cost function)
def loss_grad(params, inputs, targets, hps):
hidden_act_raw, hidden_act, output_act_raw, output_act = forward(params, inputs, hps)
## gradients for decode layer ( chain rule on cost function )
decode_grad = np.multiply(
hps['output_activation_deriv'](output_act_raw),
(2 * (output_act - targets)) / inputs.shape[0] # <-- deriv of cost function
)
## gradients for decode weights
decode_grad_w = np.matmul(
hidden_act.T,
decode_grad
)
## gradients for decode bias
decode_grad_b = decode_grad.sum(axis = 0, keepdims = True)
# - - - - - - - - - -
## gradients for encode layer ( chain rule on hidden layer )
encode_grad = np.multiply(
hps['hidden_activation_deriv'](hidden_act_raw),
np.matmul(
decode_grad,
params['hidden']['output']['weights'].T
)
)
## gradients for encode weights
encode_grad_w = np.matmul(
inputs.T,
encode_grad
)
## gradients for encode bias
encode_grad_b = encode_grad.sum(axis = 0, keepdims = True)
return {
'input': {
'hidden': {
'weights': encode_grad_w,
'bias': encode_grad_b,
}
},
'hidden': {
'output': {
'weights': decode_grad_w,
'bias': decode_grad_b,
}
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - -
# -- Convenience Functions --
# - - - - - - - - - - - - - - - - - -
def build_params(num_features: int, num_hidden_nodes: int, num_categories: int, weight_range: tuple = (-.1, .1)) -> dict:
'''
num_features <-- (numeric) number of feature in the dataset
num_hidden_nodes <-- (numeric)
num_categories <-- (list) list of category labels to use as keys for decode -- output connections
weight_range = [-.1,.1] <-- (list of numeric)
'''
return {
'input': {
'hidden': {
'weights': np.random.uniform(*weight_range, [num_features, num_hidden_nodes]),
'bias': np.random.uniform(*weight_range, [1, num_hidden_nodes]),
},
},
'hidden': {
'output': {
'weights': np.random.uniform(*weight_range, [num_hidden_nodes, num_categories]),
'bias': np.random.uniform(*weight_range, [1, num_categories]),
}
},
}
def update_params(params: dict, gradients: dict, lr: float) -> dict:
for layer in params:
for connection in params[layer]:
params[layer][connection]['weights'] -= lr * gradients[layer][connection]['weights']
params[layer][connection]['bias'] -= lr * gradients[layer][connection]['bias']
return params
def response(params: dict, inputs: np.ndarray, hps: dict) -> np.ndarray:
return np.argmax(
forward(params, inputs = inputs, hps = hps)[-1],
axis = 1
)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Run Model
if __name__ == '__main__':
# makeup a random dataset
inputs = np.array([
[.2, .3],
[.3, .4],
[.4, .5],
[.5, .6],
[.6, .7],
[.7, .8],
[.8, .9],
[.2, .1],
[.3, .2],
[.4, .3],
[.5, .4],
[.6, .5],
[.7, .6],
[.8, .7],
])
one_hot_labels = np.array([
[0,1],
[0,1],
[0,1],
[0,1],
[0,1],
[0,1],
[0,1],
[1,0],
[1,0],
[1,0],
[1,0],
[1,0],
[1,0],
[1,0],
])
sigmoid = lambda x: 1 / (1 + np.exp(-x))
hps = {
'lr': .5, # <-- learning rate
'wr': [-.1, .1], # <-- weight range
'num_hidden_nodes': 10,
'hidden_activation': lambda x: sigmoid(x), # <-- sigmoid activation function
'hidden_activation_deriv': lambda x: sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative
'output_activation': lambda x: sigmoid(x), # <-- sigmoid activation function
'output_activation_deriv': lambda x: sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative
}
params = build_params(
inputs.shape[1], # <-- num features
hps['num_hidden_nodes'],
one_hot_labels.shape[1]
)
num_epochs = 1000
print('loss initially: ', loss(params, inputs, one_hot_labels, hps))
for epoch in range(num_epochs):
gradients = loss_grad(params, inputs, one_hot_labels, hps)
params = update_params(params, gradients, hps['lr'])
print('loss after training: ', loss(params, inputs, one_hot_labels, hps))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment