Skip to content

Instantly share code, notes, and snippets.

@Z30G0D
Created March 13, 2018 09:10
Show Gist options
  • Save Z30G0D/a9065d3277c1d70c62aa98c8513d4dbd to your computer and use it in GitHub Desktop.
Save Z30G0D/a9065d3277c1d70c62aa98c8513d4dbd to your computer and use it in GitHub Desktop.
The 4th exercise from machine learning course by Andrew NG including feed forwad
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hey all!\n",
"## Coursera's Machine Learning course- Exercise 4\n",
"<br>\n",
"This is the 4th exercise of the course concerning building of neural networks from scratch.<br>\n",
"As always, please send your remarks to my mail:tomer@nahshoh.net"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from scipy.io import loadmat\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from scipy.optimize import minimize"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's load the data:"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"data = loadmat('ex4data1.mat')\n",
"X = data['X']\n",
"y = data['y']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Turning the y vector (digits from 0-9) to one hot encoder (10 x 1 vector with '1' in the appropriate place)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"encoder = OneHotEncoder(sparse=False)\n",
"y_encoder = encoder.fit_transform(y)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((5000, 400), (5000, 10))"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.shape,y_encoder.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ok, so we have 5000 pictures of 20x20 pixels and their appropriate label vectors.<br>\n",
"We will have in our network 3 layers(input layer, hidden layer and an output layer).<br>\n",
"Let's load the weights trained already."
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"theta = loadmat('ex4weights.mat')\n",
"theta1 = theta['Theta1']\n",
"theta2 = theta['Theta2']"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# # calculating cost function, according to ex4.pdf, the codes used here are attached to the original zip\n",
"# file provided by coursera\n",
"\n",
"def sigmoid(z):\n",
" return 1 / (1 + np.exp(-z))"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"def forward_propagate(X, theta1, theta2):\n",
" # number of examples\n",
" m = X.shape[0]\n",
" # setting a1 as input to network(adding bias)\n",
" a1 = np.insert(X, 0, values=np.ones(m), axis=1)\n",
" # using weights\n",
" z2 = a1 * theta1.T\n",
" # calculating a2 as input and adding bias\n",
" a2 = np.insert(sigmoid(z2), 0, values=np.ones(m), axis=1)\n",
" # calculating z3\n",
" z3 = a2 * theta2.T\n",
" # calculating hypothesis\n",
" h = sigmoid(z3)\n",
" return a1, z2, a2, z3, h\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):\n",
" # vector in the size of all of the weights - taken from different code by jdwett (credits in repository)\n",
" # compute cost with regularization\n",
" m = X.shape[0]\n",
" # shape input and output as matrices for cost calculation\n",
" X = np.matrix(X)\n",
" y = np.matrix(y)\n",
"\n",
" # reshape the parameter array(initial theta) into parameter matrices for each layer\n",
" theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))\n",
" theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))\n",
"\n",
" # calculating feedfoward results by sigmoid functions\n",
" a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)\n",
"\n",
" # delta terms for back propagation\n",
" delta1 = np.zeros(theta1.shape) # (25, 401)\n",
" delta2 = np.zeros(theta2.shape) # (10, 26)\n",
" # compute cost using the log formula\n",
" Jcost = 0\n",
" for i in range(m):\n",
" first_term = np.multiply(-y[i, :], np.log(h[i, :]))\n",
" second_term = np.multiply((1 - y[i, :]), np.log(1 - h[i, :]))\n",
" Jcost += np.sum(first_term - second_term)\n",
"\n",
" Jcost = Jcost / m\n",
" # Adding regularization\n",
" theta1square = np.square(theta1)\n",
" theta2square = np.square(theta2)\n",
" #Reg = (learning_rate * (theta1square.sum() + theta2square.sum())) / (2 * m)\n",
" # Summing terms\n",
" Jcost = Jcost \n",
"\n",
" # doing back propagation\n",
" for t in range(m):\n",
" # defining a, z, h, y for every example picture (total of 5000 pictures, 400 pixels each)\n",
" a1t = a1[t, :]\n",
" z2t = z2[t, :]\n",
" a2t = a2[t, :]\n",
" ht = h[t, :]\n",
" yt = y[t, :]\n",
"\n",
" # output layer delta term\n",
" d3t = ht - yt # (1, 10)\n",
"\n",
" # hidden layer delta term (\n",
" # bias\n",
" z2t = np.insert(z2t, 0, values=np.ones(1))\n",
" # here we need the weighted delta term using d3t - formula taken from figure 3\n",
" d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t))\n",
"\n",
" delta1 = delta1 + (d2t[:, 1:]).T * a1t\n",
" delta2 = delta2 + d3t.T * a2t\n",
" # step 4 for backpropagation from the pdf\n",
" delta1 = delta1 / m\n",
" delta2 = delta2 / m\n",
"\n",
" # add the gradient regularization term (excluding the bias)\n",
" delta1[:, 1:] = delta1[:, 1:] + (theta1[:, 1:] * learning_rate) / m\n",
" delta2[:, 1:] = delta2[:, 1:] + (theta2[:, 1:] * learning_rate) / m\n",
"\n",
" # calculating gradient (taken from jwdett code)\n",
" # creates a long array of all the deltas found\n",
" grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))\n",
"\n",
" return Jcost, grad"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"# function for sigmoid gradient for backpropagation (section 2.1 in exercise 4)\n",
"\n",
"\n",
"def sigmoid_gradient(z):\n",
" return np.multiply(sigmoid(z), (1 - sigmoid(z)))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6.990485579690806\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"(10285,)\n",
" fun: 3.1728815270801514\n",
" jac: array([ 3.54647298e-03, 5.85148554e-06, -1.52412688e-05, ...,\n",
" -1.84052240e-02, -4.10349473e-03, -1.48495324e-03])\n",
" message: 'Max. number of function evaluations reached'\n",
" nfev: 10\n",
" nit: 3\n",
" status: 3\n",
" success: False\n",
" x: array([-0.03468587, 0.02925743, -0.07620634, ..., -0.0982877 ,\n",
" -0.16254782, -0.24348178])\n"
]
}
],
"source": [
"\n",
"# defining network parameters\n",
"\n",
"# 20 by 20 pixels images reshaped to 400 size array\n",
"input_size = 400\n",
"# hidden layer size\n",
"hidden_size = 25\n",
"# outputlayer size\n",
"num_labels = 10\n",
"# Learning rate\n",
"learning_rate = 1\n",
"\n",
"# Creating arbitrary weights according to intialization guidelines\n",
"params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25\n",
"\n",
"result, gradient = backprop(params, input_size, hidden_size, num_labels, X, y_encoder, learning_rate)\n",
"\n",
"print (result)\n",
"print (4 * \"\\n\")\n",
"print(gradient.shape)\n",
"\n",
"# using scipi for minimzing the cost function\n",
"fmin = minimize(fun=backprop, x0=params, args=(input_size, hidden_size, num_labels, X, y_encoder, learning_rate),\n",
" method='TNC', jac=True, options={'maxiter': 10})\n",
"print(fmin)\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[10],\n",
" [10],\n",
" [ 3],\n",
" ...,\n",
" [ 3],\n",
" [ 9],\n",
" [ 2]], dtype=int64)"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = np.matrix(X)\n",
"theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))\n",
"theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))\n",
"\n",
"a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)\n",
"y_pred = np.array(np.argmax(h, axis=1) + 1)\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy = 43.86%\n"
]
}
],
"source": [
"correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y)]\n",
"accuracy = (sum(map(int, correct)) / float(len(correct)))\n",
"print ('accuracy = {0}%'.format(accuracy * 100))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"quite bad, but there's no regularization in this implementation.. \n",
"I will have to get back to it at some point."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (myenv)",
"language": "python",
"name": "myenv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment