Created
March 13, 2018 09:10
-
-
Save Z30G0D/a9065d3277c1d70c62aa98c8513d4dbd to your computer and use it in GitHub Desktop.
The 4th exercise from machine learning course by Andrew NG including feed forwad
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Hey all!\n", | |
"## Coursera's Machine Learning course- Exercise 4\n", | |
"<br>\n", | |
"This is the 4th exercise of the course concerning building of neural networks from scratch.<br>\n", | |
"As always, please send your remarks to my mail:tomer@nahshoh.net" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"from scipy.io import loadmat\n", | |
"from sklearn.preprocessing import OneHotEncoder\n", | |
"from scipy.optimize import minimize" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's load the data:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = loadmat('ex4data1.mat')\n", | |
"X = data['X']\n", | |
"y = data['y']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Turning the y vector (digits from 0-9) to one hot encoder (10 x 1 vector with '1' in the appropriate place)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"encoder = OneHotEncoder(sparse=False)\n", | |
"y_encoder = encoder.fit_transform(y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((5000, 400), (5000, 10))" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X.shape,y_encoder.shape" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Ok, so we have 5000 pictures of 20x20 pixels and their appropriate label vectors.<br>\n", | |
"We will have in our network 3 layers(input layer, hidden layer and an output layer).<br>\n", | |
"Let's load the weights trained already." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"theta = loadmat('ex4weights.mat')\n", | |
"theta1 = theta['Theta1']\n", | |
"theta2 = theta['Theta2']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# # calculating cost function, according to ex4.pdf, the codes used here are attached to the original zip\n", | |
"# file provided by coursera\n", | |
"\n", | |
"def sigmoid(z):\n", | |
" return 1 / (1 + np.exp(-z))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def forward_propagate(X, theta1, theta2):\n", | |
" # number of examples\n", | |
" m = X.shape[0]\n", | |
" # setting a1 as input to network(adding bias)\n", | |
" a1 = np.insert(X, 0, values=np.ones(m), axis=1)\n", | |
" # using weights\n", | |
" z2 = a1 * theta1.T\n", | |
" # calculating a2 as input and adding bias\n", | |
" a2 = np.insert(sigmoid(z2), 0, values=np.ones(m), axis=1)\n", | |
" # calculating z3\n", | |
" z3 = a2 * theta2.T\n", | |
" # calculating hypothesis\n", | |
" h = sigmoid(z3)\n", | |
" return a1, z2, a2, z3, h\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):\n", | |
" # vector in the size of all of the weights - taken from different code by jdwett (credits in repository)\n", | |
" # compute cost with regularization\n", | |
" m = X.shape[0]\n", | |
" # shape input and output as matrices for cost calculation\n", | |
" X = np.matrix(X)\n", | |
" y = np.matrix(y)\n", | |
"\n", | |
" # reshape the parameter array(initial theta) into parameter matrices for each layer\n", | |
" theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))\n", | |
" theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))\n", | |
"\n", | |
" # calculating feedfoward results by sigmoid functions\n", | |
" a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)\n", | |
"\n", | |
" # delta terms for back propagation\n", | |
" delta1 = np.zeros(theta1.shape) # (25, 401)\n", | |
" delta2 = np.zeros(theta2.shape) # (10, 26)\n", | |
" # compute cost using the log formula\n", | |
" Jcost = 0\n", | |
" for i in range(m):\n", | |
" first_term = np.multiply(-y[i, :], np.log(h[i, :]))\n", | |
" second_term = np.multiply((1 - y[i, :]), np.log(1 - h[i, :]))\n", | |
" Jcost += np.sum(first_term - second_term)\n", | |
"\n", | |
" Jcost = Jcost / m\n", | |
" # Adding regularization\n", | |
" theta1square = np.square(theta1)\n", | |
" theta2square = np.square(theta2)\n", | |
" #Reg = (learning_rate * (theta1square.sum() + theta2square.sum())) / (2 * m)\n", | |
" # Summing terms\n", | |
" Jcost = Jcost \n", | |
"\n", | |
" # doing back propagation\n", | |
" for t in range(m):\n", | |
" # defining a, z, h, y for every example picture (total of 5000 pictures, 400 pixels each)\n", | |
" a1t = a1[t, :]\n", | |
" z2t = z2[t, :]\n", | |
" a2t = a2[t, :]\n", | |
" ht = h[t, :]\n", | |
" yt = y[t, :]\n", | |
"\n", | |
" # output layer delta term\n", | |
" d3t = ht - yt # (1, 10)\n", | |
"\n", | |
" # hidden layer delta term (\n", | |
" # bias\n", | |
" z2t = np.insert(z2t, 0, values=np.ones(1))\n", | |
" # here we need the weighted delta term using d3t - formula taken from figure 3\n", | |
" d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t))\n", | |
"\n", | |
" delta1 = delta1 + (d2t[:, 1:]).T * a1t\n", | |
" delta2 = delta2 + d3t.T * a2t\n", | |
" # step 4 for backpropagation from the pdf\n", | |
" delta1 = delta1 / m\n", | |
" delta2 = delta2 / m\n", | |
"\n", | |
" # add the gradient regularization term (excluding the bias)\n", | |
" delta1[:, 1:] = delta1[:, 1:] + (theta1[:, 1:] * learning_rate) / m\n", | |
" delta2[:, 1:] = delta2[:, 1:] + (theta2[:, 1:] * learning_rate) / m\n", | |
"\n", | |
" # calculating gradient (taken from jwdett code)\n", | |
" # creates a long array of all the deltas found\n", | |
" grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))\n", | |
"\n", | |
" return Jcost, grad" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# function for sigmoid gradient for backpropagation (section 2.1 in exercise 4)\n", | |
"\n", | |
"\n", | |
"def sigmoid_gradient(z):\n", | |
" return np.multiply(sigmoid(z), (1 - sigmoid(z)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"6.990485579690806\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"(10285,)\n", | |
" fun: 3.1728815270801514\n", | |
" jac: array([ 3.54647298e-03, 5.85148554e-06, -1.52412688e-05, ...,\n", | |
" -1.84052240e-02, -4.10349473e-03, -1.48495324e-03])\n", | |
" message: 'Max. number of function evaluations reached'\n", | |
" nfev: 10\n", | |
" nit: 3\n", | |
" status: 3\n", | |
" success: False\n", | |
" x: array([-0.03468587, 0.02925743, -0.07620634, ..., -0.0982877 ,\n", | |
" -0.16254782, -0.24348178])\n" | |
] | |
} | |
], | |
"source": [ | |
"\n", | |
"# defining network parameters\n", | |
"\n", | |
"# 20 by 20 pixels images reshaped to 400 size array\n", | |
"input_size = 400\n", | |
"# hidden layer size\n", | |
"hidden_size = 25\n", | |
"# outputlayer size\n", | |
"num_labels = 10\n", | |
"# Learning rate\n", | |
"learning_rate = 1\n", | |
"\n", | |
"# Creating arbitrary weights according to intialization guidelines\n", | |
"params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25\n", | |
"\n", | |
"result, gradient = backprop(params, input_size, hidden_size, num_labels, X, y_encoder, learning_rate)\n", | |
"\n", | |
"print (result)\n", | |
"print (4 * \"\\n\")\n", | |
"print(gradient.shape)\n", | |
"\n", | |
"# using scipi for minimzing the cost function\n", | |
"fmin = minimize(fun=backprop, x0=params, args=(input_size, hidden_size, num_labels, X, y_encoder, learning_rate),\n", | |
" method='TNC', jac=True, options={'maxiter': 10})\n", | |
"print(fmin)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[10],\n", | |
" [10],\n", | |
" [ 3],\n", | |
" ...,\n", | |
" [ 3],\n", | |
" [ 9],\n", | |
" [ 2]], dtype=int64)" | |
] | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X = np.matrix(X)\n", | |
"theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))\n", | |
"theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))\n", | |
"\n", | |
"a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)\n", | |
"y_pred = np.array(np.argmax(h, axis=1) + 1)\n", | |
"y_pred" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"accuracy = 43.86%\n" | |
] | |
} | |
], | |
"source": [ | |
"correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y)]\n", | |
"accuracy = (sum(map(int, correct)) / float(len(correct)))\n", | |
"print ('accuracy = {0}%'.format(accuracy * 100))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"quite bad, but there's no regularization in this implementation.. \n", | |
"I will have to get back to it at some point." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python (myenv)", | |
"language": "python", | |
"name": "myenv" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment