mw3i/ff_neural_classifyer_np.py

## ff_neural_classifyer_np.py
## ext requirements
import numpy as np

# - - - - - - - - - - - - - - - - - -

    # -- Model --

# - - - - - - - - - - - - - - - - - -

## produces model outputs
def forward(params: dict, inputs: np.ndarray, hps: np.ndarray) -> list:
    hidden_act_raw = np.add(
        np.matmul(
            inputs,
            params['input']['hidden']['weights']
        ),
        params['input']['hidden']['bias']
    )

    hidden_act = hps['hidden_activation'](hidden_act_raw)

    output_act_raw = np.add(
        np.matmul(
            hidden_act,
            params['hidden']['output']['weights']
        ),
        params['hidden']['output']['bias'],
    )

    output_act = hps['output_activation'](output_act_raw)

    return [hidden_act_raw, hidden_act, output_act_raw, output_act]


## logistic loss function
def loss(params: dict, inputs: np.ndarray, targets: np.ndarray, hps: dict) -> float:
    return np.sum(
        np.square(
            np.subtract(
                forward(params, inputs, hps)[-1],
                targets
            )
        )
    ) / inputs.shape[0]


## backprop (for sum squared error cost function)
def loss_grad(params, inputs, targets, hps):
    hidden_act_raw, hidden_act, output_act_raw, output_act = forward(params, inputs, hps)

    ## gradients for decode layer ( chain rule on cost function )
    decode_grad = np.multiply(
        hps['output_activation_deriv'](output_act_raw),
        (2 * (output_act - targets))  / inputs.shape[0] # <-- deriv of cost function
    )

    ## gradients for decode weights
    decode_grad_w = np.matmul(
        hidden_act.T,
        decode_grad
    )

    ## gradients for decode bias
    decode_grad_b = decode_grad.sum(axis = 0, keepdims = True)

    # - - - - - - - - - -

    ## gradients for encode layer ( chain rule on hidden layer )
    encode_grad = np.multiply(
        hps['hidden_activation_deriv'](hidden_act_raw),
        np.matmul(
            decode_grad,
            params['hidden']['output']['weights'].T
        )
    )

    ## gradients for encode weights
    encode_grad_w = np.matmul(
        inputs.T,
        encode_grad
    )

    ## gradients for encode bias
    encode_grad_b = encode_grad.sum(axis = 0, keepdims = True)

    return {
        'input': {
            'hidden': {
                'weights': encode_grad_w,
                'bias': encode_grad_b,
            }
        },
        'hidden': {
            'output': {
                'weights': decode_grad_w,
                'bias': decode_grad_b,
            }
        }
    }


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


# - - - - - - - - - - - - - - - - - -

    # -- Convenience Functions --

# - - - - - - - - - - - - - - - - - -

def build_params(num_features: int, num_hidden_nodes: int, num_categories: int, weight_range: tuple = (-.1, .1)) -> dict:
    '''
    num_features <-- (numeric) number of feature in the dataset
    num_hidden_nodes <-- (numeric)
    num_categories <-- (list) list of category labels to use as keys for decode -- output connections
    weight_range = [-.1,.1] <-- (list of numeric)
    '''
    return {
        'input': {
            'hidden': {
                'weights': np.random.uniform(*weight_range, [num_features, num_hidden_nodes]),
                'bias': np.random.uniform(*weight_range, [1, num_hidden_nodes]),
            },
        },
        'hidden': {
            'output': {
                'weights': np.random.uniform(*weight_range, [num_hidden_nodes, num_categories]),
                'bias': np.random.uniform(*weight_range, [1, num_categories]),
            }
        },
    }

def update_params(params: dict, gradients: dict, lr: float) -> dict:
    for layer in params:
        for connection in params[layer]:
            params[layer][connection]['weights'] -= lr * gradients[layer][connection]['weights']
            params[layer][connection]['bias'] -= lr * gradients[layer][connection]['bias']
    return params


def response(params: dict, inputs: np.ndarray, hps: dict) -> np.ndarray:
    return np.argmax(
        forward(params, inputs = inputs, hps = hps)[-1],
        axis = 1
    )


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

## Run Model

if __name__ == '__main__':

    # makeup a random dataset
    inputs = np.array([
        [.2, .3],
        [.3, .4],
        [.4, .5],
        [.5, .6],
        [.6, .7],
        [.7, .8],
        [.8, .9],

        [.2, .1],
        [.3, .2],
        [.4, .3],
        [.5, .4],
        [.6, .5],
        [.7, .6],
        [.8, .7],
    ])

    one_hot_labels = np.array([
        [0,1],
        [0,1],
        [0,1],
        [0,1],
        [0,1],
        [0,1],
        [0,1],

        [1,0],
        [1,0],
        [1,0],
        [1,0],
        [1,0],
        [1,0],
        [1,0],
    ])

    sigmoid = lambda x: 1 / (1 + np.exp(-x))

    hps = {
        'lr': .5,  # <-- learning rate
        'wr': [-.1, .1], # <-- weight range
        'num_hidden_nodes': 10,

        'hidden_activation': lambda x: sigmoid(x), # <-- sigmoid activation function
        'hidden_activation_deriv': lambda x:  sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative

        'output_activation': lambda x: sigmoid(x), # <-- sigmoid activation function
        'output_activation_deriv': lambda x:  sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative
    }

    params = build_params(
        inputs.shape[1],  # <-- num features
        hps['num_hidden_nodes'],
        one_hot_labels.shape[1]
    )

    num_epochs = 1000

    print('loss initially: ', loss(params, inputs, one_hot_labels, hps))

    for epoch in range(num_epochs):
        gradients = loss_grad(params, inputs, one_hot_labels, hps)
        params = update_params(params, gradients, hps['lr'])

    print('loss after training: ', loss(params, inputs, one_hot_labels, hps))
	## ext requirements
	import numpy as np

	# - - - - - - - - - - - - - - - - - -

	# -- Model --

	# - - - - - - - - - - - - - - - - - -

	## produces model outputs
	def forward(params: dict, inputs: np.ndarray, hps: np.ndarray) -> list:
	hidden_act_raw = np.add(
	np.matmul(
	inputs,
	params['input']['hidden']['weights']
	),
	params['input']['hidden']['bias']
	)

	hidden_act = hps['hidden_activation'](hidden_act_raw)

	output_act_raw = np.add(
	np.matmul(
	hidden_act,
	params['hidden']['output']['weights']
	),
	params['hidden']['output']['bias'],
	)

	output_act = hps['output_activation'](output_act_raw)

	return [hidden_act_raw, hidden_act, output_act_raw, output_act]



	## logistic loss function
	def loss(params: dict, inputs: np.ndarray, targets: np.ndarray, hps: dict) -> float:
	return np.sum(
	np.square(
	np.subtract(
	forward(params, inputs, hps)[-1],
	targets
	)
	)
	) / inputs.shape[0]


	## backprop (for sum squared error cost function)
	def loss_grad(params, inputs, targets, hps):
	hidden_act_raw, hidden_act, output_act_raw, output_act = forward(params, inputs, hps)

	## gradients for decode layer ( chain rule on cost function )
	decode_grad = np.multiply(
	hps['output_activation_deriv'](output_act_raw),
	(2 * (output_act - targets)) / inputs.shape[0] # <-- deriv of cost function
	)

	## gradients for decode weights
	decode_grad_w = np.matmul(
	hidden_act.T,
	decode_grad
	)

	## gradients for decode bias
	decode_grad_b = decode_grad.sum(axis = 0, keepdims = True)

	# - - - - - - - - - -

	## gradients for encode layer ( chain rule on hidden layer )
	encode_grad = np.multiply(
	hps['hidden_activation_deriv'](hidden_act_raw),
	np.matmul(
	decode_grad,
	params['hidden']['output']['weights'].T
	)
	)

	## gradients for encode weights
	encode_grad_w = np.matmul(
	inputs.T,
	encode_grad
	)

	## gradients for encode bias
	encode_grad_b = encode_grad.sum(axis = 0, keepdims = True)

	return {
	'input': {
	'hidden': {
	'weights': encode_grad_w,
	'bias': encode_grad_b,
	}
	},
	'hidden': {
	'output': {
	'weights': decode_grad_w,
	'bias': decode_grad_b,
	}
	}
	}



	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



	# - - - - - - - - - - - - - - - - - -

	# -- Convenience Functions --

	# - - - - - - - - - - - - - - - - - -

	def build_params(num_features: int, num_hidden_nodes: int, num_categories: int, weight_range: tuple = (-.1, .1)) -> dict:
	'''
	num_features <-- (numeric) number of feature in the dataset
	num_hidden_nodes <-- (numeric)
	num_categories <-- (list) list of category labels to use as keys for decode -- output connections
	weight_range = [-.1,.1] <-- (list of numeric)
	'''
	return {
	'input': {
	'hidden': {
	'weights': np.random.uniform(*weight_range, [num_features, num_hidden_nodes]),
	'bias': np.random.uniform(*weight_range, [1, num_hidden_nodes]),
	},
	},
	'hidden': {
	'output': {
	'weights': np.random.uniform(*weight_range, [num_hidden_nodes, num_categories]),
	'bias': np.random.uniform(*weight_range, [1, num_categories]),
	}
	},
	}

	def update_params(params: dict, gradients: dict, lr: float) -> dict:
	for layer in params:
	for connection in params[layer]:
	params[layer][connection]['weights'] -= lr * gradients[layer][connection]['weights']
	params[layer][connection]['bias'] -= lr * gradients[layer][connection]['bias']
	return params


	def response(params: dict, inputs: np.ndarray, hps: dict) -> np.ndarray:
	return np.argmax(
	forward(params, inputs = inputs, hps = hps)[-1],
	axis = 1
	)


	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

	## Run Model

	if __name__ == '__main__':

	# makeup a random dataset
	inputs = np.array([
	[.2, .3],
	[.3, .4],
	[.4, .5],
	[.5, .6],
	[.6, .7],
	[.7, .8],
	[.8, .9],

	[.2, .1],
	[.3, .2],
	[.4, .3],
	[.5, .4],
	[.6, .5],
	[.7, .6],
	[.8, .7],
	])

	one_hot_labels = np.array([
	[0,1],
	[0,1],
	[0,1],
	[0,1],
	[0,1],
	[0,1],
	[0,1],

	[1,0],
	[1,0],
	[1,0],
	[1,0],
	[1,0],
	[1,0],
	[1,0],
	])

	sigmoid = lambda x: 1 / (1 + np.exp(-x))

	hps = {
	'lr': .5, # <-- learning rate
	'wr': [-.1, .1], # <-- weight range
	'num_hidden_nodes': 10,

	'hidden_activation': lambda x: sigmoid(x), # <-- sigmoid activation function
	'hidden_activation_deriv': lambda x: sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative

	'output_activation': lambda x: sigmoid(x), # <-- sigmoid activation function
	'output_activation_deriv': lambda x: sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative
	}

	params = build_params(
	inputs.shape[1], # <-- num features
	hps['num_hidden_nodes'],
	one_hot_labels.shape[1]
	)

	num_epochs = 1000

	print('loss initially: ', loss(params, inputs, one_hot_labels, hps))

	for epoch in range(num_epochs):
	gradients = loss_grad(params, inputs, one_hot_labels, hps)
	params = update_params(params, gradients, hps['lr'])

	print('loss after training: ', loss(params, inputs, one_hot_labels, hps))