Created
August 22, 2018 10:08
-
-
Save varun19299/d2047e37d6e45d7b21b3091f23acb9f1 to your computer and use it in GitHub Desktop.
Forward_prop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sigmoid(x): | |
x = 1/(1+np.exp(-x)) | |
return x | |
def sigmoid_grad(x): | |
return (x)*(1-x) | |
def relu(x): | |
return np.maximum(0,x) | |
def tanh(x): | |
return (2*sigmoid(2*x) - 1) | |
def tanh_grad(x): | |
return (1 - (tanh(x))**2) | |
#function to train a three layer neural net with either RELU or sigmoid nonlinearity via vanilla grad descent | |
def three_layer_net(NONLINEARITY,X,y, model, step_size, reg): | |
#parameter initialization | |
h= model['h'] | |
h2= model['h2'] | |
W1= model['W1'] | |
W2= model['W2'] | |
W3= model['W3'] | |
b1= model['b1'] | |
b2= model['b2'] | |
b3= model['b3'] | |
# some hyperparameters | |
# gradient descent loop | |
num_examples = X.shape[0] | |
plot_array_1=[] | |
plot_array_2=[] | |
for i in range(50000): | |
#FOWARD PROP | |
if NONLINEARITY== 'RELU': | |
hidden_layer = relu(np.dot(X, W1) + b1) | |
hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2) | |
scores = np.dot(hidden_layer2, W3) + b3 | |
elif NONLINEARITY == 'SIGM': | |
hidden_layer = sigmoid(np.dot(X, W1) + b1) | |
hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2) | |
scores = np.dot(hidden_layer2, W3) + b3 | |
elif NONLINEARITY == 'TANH': | |
hidden_layer = tanh(np.dot(X, W1) + b1) | |
hidden_layer2 = tanh(np.dot(hidden_layer, W2) + b2) | |
scores = np.dot(hidden_layer2, W3) + b3 | |
exp_scores = np.exp(scores) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # [N x K] | |
# compute the loss: average cross-entropy loss and regularization | |
corect_logprobs = -np.log(probs[range(num_examples),y]) | |
data_loss = np.sum(corect_logprobs)/num_examples | |
reg_loss = 0.5*reg*np.sum(W1*W1) + 0.5*reg*np.sum(W2*W2)+ 0.5*reg*np.sum(W3*W3) | |
loss = data_loss + reg_loss | |
if i % 1000 == 0: | |
print("iteration : "+ str(i) + " loss : " + str(loss) ) | |
# compute the gradient on scores | |
dscores = probs | |
dscores[range(num_examples),y] -= 1 | |
dscores /= num_examples | |
# BACKPROP HERE | |
dW3 = (hidden_layer2.T).dot(dscores) | |
db3 = np.sum(dscores, axis=0, keepdims=True) | |
if NONLINEARITY == 'RELU': | |
#backprop ReLU nonlinearity here | |
dhidden2 = np.dot(dscores, W3.T) | |
dhidden2[hidden_layer2 <= 0] = 0 | |
dW2 = np.dot( hidden_layer.T, dhidden2) | |
plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape))) | |
db2 = np.sum(dhidden2, axis=0) | |
dhidden = np.dot(dhidden2, W2.T) | |
dhidden[hidden_layer <= 0] = 0 | |
elif NONLINEARITY == 'SIGM': | |
#backprop sigmoid nonlinearity here | |
dhidden2 = dscores.dot(W3.T)*sigmoid_grad(hidden_layer2) | |
dW2 = (hidden_layer.T).dot(dhidden2) | |
plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape))) | |
db2 = np.sum(dhidden2, axis=0) | |
dhidden = dhidden2.dot(W2.T)*sigmoid_grad(hidden_layer) | |
elif NONLINEARITY == 'TANH': | |
#backprop tanh nonlinearity here | |
dhidden2 = dscores.dot(W3.T)*tanh_grad(hidden_layer2) | |
dW2 = (hidden_layer.T).dot(dhidden2) | |
plot_array_2.append(np.sum(np.abs(dW2))/np.sum(np.abs(dW2.shape))) | |
db2 = np.sum(dhidden2, axis=0) | |
dhidden = dhidden2.dot(W2.T)*tanh_grad(hidden_layer) | |
dW1 = np.dot(X.T, dhidden) | |
plot_array_1.append(np.sum(np.abs(dW1))/np.sum(np.abs(dW1.shape))) | |
db1 = np.sum(dhidden, axis=0) | |
# add regularization | |
dW3+= reg * W3 | |
dW2 += reg * W2 | |
dW1 += reg * W1 | |
#option to return loss, grads -- uncomment next comment | |
grads={} | |
grads['W1']=dW1 | |
grads['W2']=dW2 | |
grads['W3']=dW3 | |
grads['b1']=db1 | |
grads['b2']=db2 | |
grads['b3']=db3 | |
#return loss, grads | |
# update | |
W1 += -step_size * dW1 | |
b1 += -step_size * db1 | |
W2 += -step_size * dW2 | |
b2 += -step_size * db2 | |
W3 += -step_size * dW3 | |
b3 += -step_size * db3 | |
# evaluate training set accuracy | |
if NONLINEARITY == 'RELU': | |
hidden_layer = relu(np.dot(X, W1) + b1) | |
hidden_layer2 = relu(np.dot(hidden_layer, W2) + b2) | |
elif NONLINEARITY == 'SIGM': | |
hidden_layer = sigmoid(np.dot(X, W1) + b1) | |
hidden_layer2 = sigmoid(np.dot(hidden_layer, W2) + b2) | |
scores = np.dot(hidden_layer2, W3) + b3 | |
predicted_class = np.argmax(scores, axis=1) | |
print("training accuracy:" + str(np.mean(predicted_class == y))) | |
#return cost, grads | |
return plot_array_1, plot_array_2, W1, W2, W3, b1, b2, b3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment