Created
January 31, 2020 20:05
-
-
Save say4n/e09ffd514ce67bc01513978198fec36c to your computer and use it in GitHub Desktop.
Hacker's guide to Neural Networks in Python!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Backpropagation Algorithm\n", | |
"Python implementation of Karpathy's \"[Hacker's guide to Neural Networks](https://karpathy.github.io/neuralnets/)\"." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Objective" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"-6" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def forwardMultiplyGate(x, y):\n", | |
" return x * y\n", | |
"\n", | |
"forwardMultiplyGate(-2, 3)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Random Local Search" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Best: -5.953757682116292; x: -1.9907345593023322, y: 2.990734075668446\n" | |
] | |
} | |
], | |
"source": [ | |
"import random\n", | |
"\n", | |
"x, y = -2, 3\n", | |
"\n", | |
"delta = 0.01\n", | |
"best = float('-inf')\n", | |
"best_x, best_y = x, y\n", | |
"\n", | |
"\n", | |
"for iteration in range(100):\n", | |
" new_x = x + delta * (random.random() * 2 - 1)\n", | |
" new_y = y + delta * (random.random() * 2 - 1)\n", | |
" \n", | |
" out = forwardMultiplyGate(new_x, new_y)\n", | |
" \n", | |
" if out > best:\n", | |
" best = out\n", | |
" best_x, best_y = new_x, new_y\n", | |
"\n", | |
"print(f\"Best: {best}; x: {best_x}, y: {best_y}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Numerical Gradient" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Best: -4.811235007946064; x: -1.7094225189551153, y: 2.8145382166176987\n" | |
] | |
} | |
], | |
"source": [ | |
"step_size = 0.001\n", | |
"epsilon = 0.000001\n", | |
"\n", | |
"x, y = -2, 3\n", | |
"\n", | |
"for iteration in range(100):\n", | |
" xpe = x + epsilon\n", | |
" x_derivative = (forwardMultiplyGate(xpe, y) - forwardMultiplyGate(x, y))/epsilon\n", | |
" \n", | |
" ype = y + epsilon\n", | |
" y_derivative = (forwardMultiplyGate(x, ype) - forwardMultiplyGate(x, y))/epsilon\n", | |
" \n", | |
" x = x + step_size * x_derivative\n", | |
" y = y + step_size * y_derivative\n", | |
" \n", | |
"print(f\"Best: {forwardMultiplyGate(x, y)}; x: {x}, y: {y}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Analytic Gradient" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Best: -4.811235007904252; x: -1.7094225189238874, y: 2.8145382166446553\n" | |
] | |
} | |
], | |
"source": [ | |
"step_size = 0.001\n", | |
"\n", | |
"x, y = -2, 3\n", | |
"\n", | |
"for iteration in range(100):\n", | |
" x_derivative = y\n", | |
" y_derivative = x\n", | |
"\n", | |
" x = x + step_size * x_derivative\n", | |
" y = y + step_size * y_derivative\n", | |
" \n", | |
"print(f\"Best: {forwardMultiplyGate(x, y)}; x: {x}, y: {y}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Two \"layers\" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"-12" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def forwardAddGate(x, y):\n", | |
" return x + y\n", | |
"\n", | |
"def forwardCircuit(x, y, z):\n", | |
" return forwardMultiplyGate(forwardAddGate(x, y), z)\n", | |
" \n", | |
"x, y, z = -2, 5, -4\n", | |
"forwardCircuit(x, y, z)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Backpropagation!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Best: -8.326655101167699; x: -2.386421263453589, y: 4.613578736546414, z: -3.7386916739230753\n" | |
] | |
} | |
], | |
"source": [ | |
"step_size = 0.001\n", | |
"\n", | |
"x, y, z = -2, 5, -4\n", | |
"\n", | |
"\n", | |
"for iteration in range(100):\n", | |
" q = forwardAddGate(x, y)\n", | |
" f = forwardMultiplyGate(q, z)\n", | |
"\n", | |
" df_dq = z\n", | |
" df_dz = q\n", | |
"\n", | |
" dq_dx = 1\n", | |
" dq_dy = 1\n", | |
"\n", | |
" df_dx = df_dq * dq_dx\n", | |
" df_dy = df_dq * dq_dy\n", | |
" \n", | |
" gradients = [df_dx, df_dy, df_dz]\n", | |
" \n", | |
" x = x + step_size * df_dx\n", | |
" y = y + step_size * df_dy\n", | |
" z = z + step_size * df_dz\n", | |
" \n", | |
"print(f\"Best: {forwardCircuit(x, y, z)}; x: {x}, y: {y}, z: {z}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## SVM\n", | |
"\n", | |
"$$f(x,y) = ax + by + c$$\n", | |
"\n", | |
"Here, $x$ and $y$ are inputs and, $a$, $b$, $c$ are parameters." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class SVM:\n", | |
" def __init__(self, lr=0.01):\n", | |
" self.a = 1\n", | |
" self.da = 0\n", | |
" \n", | |
" self.b = -2\n", | |
" self.db = 0\n", | |
" \n", | |
" self.c = -1\n", | |
" self.dc = 0\n", | |
" \n", | |
" self.lr = lr\n", | |
" \n", | |
" self.output = None\n", | |
" \n", | |
" def __forward(self, x, y):\n", | |
" self.output = self.a * x + self.b * y + self.c \n", | |
" return self.output\n", | |
" \n", | |
" def __backprop(self, label):\n", | |
" pull = 0 \n", | |
" \n", | |
" if label == 1 and self.output < 1:\n", | |
" pull = 1\n", | |
" if label == -1 and self.output > -1:\n", | |
" pull = -1\n", | |
" \n", | |
" # q = ax\n", | |
" # r = by\n", | |
" # s = q + r\n", | |
" # t = s + c\n", | |
" \n", | |
" dt_ds = 1\n", | |
" dt_dc = 1\n", | |
" \n", | |
" ds_dq = 1\n", | |
" ds_dr = 1\n", | |
" \n", | |
" dr_db = y\n", | |
" dq_da = x\n", | |
" \n", | |
" ds_da = ds_dq * dq_da\n", | |
" ds_db = ds_dr * dr_db\n", | |
" \n", | |
" dt_da = pull * dt_ds * ds_da # pull * (1 * x) = pull * x\n", | |
" dt_db = pull * dt_ds * ds_db # pull * (1 * y) = pull * y\n", | |
" \n", | |
" # l1 regularization\n", | |
" self.da = dt_da - self.a # x * pull - a\n", | |
" self.db = dt_db - self.b # y * pull - b\n", | |
" self.dc = pull * dt_dc # 1 * pull\n", | |
" \n", | |
" self.output = None\n", | |
" \n", | |
" def __update(self):\n", | |
" self.a += self.lr * self.da\n", | |
" self.b += self.lr * self.db\n", | |
" self.c += self.lr * self.dc\n", | |
" \n", | |
" def train(self, data, label):\n", | |
" self.__forward(*data)\n", | |
" self.__backprop(label)\n", | |
" self.__update()\n", | |
" \n", | |
" def predict(self, data):\n", | |
" x, y = data\n", | |
" return self.a * x + self.b * y + self.c \n", | |
" \n", | |
" def evaluate(self, dataset, labels):\n", | |
" correct = 0\n", | |
" \n", | |
" for idx in range(len(dataset)):\n", | |
" pred = svm.predict(dataset[idx])\n", | |
" if pred >= 0:\n", | |
" pred = 1\n", | |
" else:\n", | |
" pred = -1\n", | |
" \n", | |
" if labels[idx] == pred:\n", | |
" correct += 1\n", | |
" \n", | |
" return correct/len(dataset)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Iteration: 1, Accuracy: 0.0\n", | |
"Iteration: 26, Accuracy: 0.0\n", | |
"Iteration: 51, Accuracy: 0.0\n", | |
"Iteration: 76, Accuracy: 0.01\n", | |
"Iteration: 101, Accuracy: 0.04\n", | |
"Iteration: 126, Accuracy: 0.03\n", | |
"Iteration: 151, Accuracy: 0.14\n", | |
"Iteration: 176, Accuracy: 0.05\n", | |
"Iteration: 201, Accuracy: 0.09\n", | |
"Iteration: 226, Accuracy: 0.38\n", | |
"Iteration: 251, Accuracy: 0.5\n", | |
"Iteration: 276, Accuracy: 0.93\n", | |
"Iteration: 301, Accuracy: 0.96\n", | |
"Iteration: 326, Accuracy: 0.97\n", | |
"Iteration: 351, Accuracy: 0.97\n", | |
"Iteration: 376, Accuracy: 0.97\n", | |
"Final Accuracy: 0.99\n" | |
] | |
} | |
], | |
"source": [ | |
"import random\n", | |
"from sklearn.datasets import make_blobs\n", | |
"\n", | |
"data, labels = make_blobs(n_samples=100, n_features=2, centers=2)\n", | |
"labels = [-1 if x == 0 else 1 for x in labels]\n", | |
"\n", | |
"svm = SVM(lr=0.01)\n", | |
"\n", | |
"for iteration in range(400):\n", | |
" idx = random.randint(0, len(data)-1)\n", | |
" d, l = data[idx], labels[idx]\n", | |
" svm.train(d, l)\n", | |
" \n", | |
" if iteration % 25 == 0:\n", | |
" acc = svm.evaluate(data, labels)\n", | |
" print(f\"Iteration: {iteration + 1}, Accuracy: {acc}\")\n", | |
"\n", | |
"acc = svm.evaluate(data, labels)\n", | |
"print(f\"Final Accuracy: {acc}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment