say4n/backprop.ipynb

## backprop.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Backpropagation Algorithm\n",
    "Python implementation of Karpathy's \"[Hacker's guide to Neural Networks](https://karpathy.github.io/neuralnets/)\"."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Objective"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-6"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def forwardMultiplyGate(x, y):\n",
    "    return x * y\n",
    "\n",
    "forwardMultiplyGate(-2, 3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Random Local Search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -5.953757682116292; x: -1.9907345593023322, y: 2.990734075668446\n"
     ]
    }
   ],
   "source": [
    "import random\n",
    "\n",
    "x, y = -2, 3\n",
    "\n",
    "delta =  0.01\n",
    "best = float('-inf')\n",
    "best_x, best_y = x, y\n",
    "\n",
    "\n",
    "for iteration in range(100):\n",
    "    new_x = x + delta * (random.random() * 2 - 1)\n",
    "    new_y = y + delta * (random.random() * 2 - 1)\n",
    "    \n",
    "    out = forwardMultiplyGate(new_x, new_y)\n",
    "    \n",
    "    if out > best:\n",
    "        best = out\n",
    "        best_x, best_y = new_x, new_y\n",
    "\n",
    "print(f\"Best: {best}; x: {best_x}, y: {best_y}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Numerical Gradient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -4.811235007946064; x: -1.7094225189551153, y: 2.8145382166176987\n"
     ]
    }
   ],
   "source": [
    "step_size = 0.001\n",
    "epsilon = 0.000001\n",
    "\n",
    "x, y = -2, 3\n",
    "\n",
    "for iteration in range(100):\n",
    "    xpe = x + epsilon\n",
    "    x_derivative = (forwardMultiplyGate(xpe, y) - forwardMultiplyGate(x, y))/epsilon\n",
    "    \n",
    "    ype = y + epsilon\n",
    "    y_derivative = (forwardMultiplyGate(x, ype) - forwardMultiplyGate(x, y))/epsilon\n",
    "    \n",
    "    x = x + step_size * x_derivative\n",
    "    y = y + step_size * y_derivative\n",
    "    \n",
    "print(f\"Best: {forwardMultiplyGate(x, y)}; x: {x}, y: {y}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analytic Gradient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -4.811235007904252; x: -1.7094225189238874, y: 2.8145382166446553\n"
     ]
    }
   ],
   "source": [
    "step_size = 0.001\n",
    "\n",
    "x, y = -2, 3\n",
    "\n",
    "for iteration in range(100):\n",
    "    x_derivative = y\n",
    "    y_derivative = x\n",
    "\n",
    "    x = x + step_size * x_derivative\n",
    "    y = y + step_size * y_derivative\n",
    "    \n",
    "print(f\"Best: {forwardMultiplyGate(x, y)}; x: {x}, y: {y}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Two \"layers\" "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-12"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def forwardAddGate(x, y):\n",
    "    return x + y\n",
    "\n",
    "def forwardCircuit(x, y, z):\n",
    "    return forwardMultiplyGate(forwardAddGate(x, y), z)\n",
    "    \n",
    "x, y, z = -2, 5, -4\n",
    "forwardCircuit(x, y, z)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Backpropagation!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -8.326655101167699; x: -2.386421263453589, y: 4.613578736546414, z: -3.7386916739230753\n"
     ]
    }
   ],
   "source": [
    "step_size = 0.001\n",
    "\n",
    "x, y, z = -2, 5, -4\n",
    "\n",
    "\n",
    "for iteration in range(100):\n",
    "    q = forwardAddGate(x, y)\n",
    "    f = forwardMultiplyGate(q, z)\n",
    "\n",
    "    df_dq = z\n",
    "    df_dz = q\n",
    "\n",
    "    dq_dx = 1\n",
    "    dq_dy = 1\n",
    "\n",
    "    df_dx = df_dq * dq_dx\n",
    "    df_dy = df_dq * dq_dy\n",
    "    \n",
    "    gradients = [df_dx, df_dy, df_dz]\n",
    "    \n",
    "    x = x + step_size * df_dx\n",
    "    y = y + step_size * df_dy\n",
    "    z = z + step_size * df_dz\n",
    "    \n",
    "print(f\"Best: {forwardCircuit(x, y, z)}; x: {x}, y: {y}, z: {z}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SVM\n",
    "\n",
    "$$f(x,y) = ax + by + c$$\n",
    "\n",
    "Here, $x$ and $y$ are inputs and, $a$, $b$, $c$ are parameters."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SVM:\n",
    "    def __init__(self, lr=0.01):\n",
    "        self.a = 1\n",
    "        self.da = 0\n",
    "        \n",
    "        self.b = -2\n",
    "        self.db = 0\n",
    "        \n",
    "        self.c = -1\n",
    "        self.dc = 0\n",
    "        \n",
    "        self.lr = lr\n",
    "        \n",
    "        self.output = None\n",
    "        \n",
    "    def __forward(self, x, y):\n",
    "        self.output = self.a * x + self.b * y + self.c \n",
    "        return self.output\n",
    "    \n",
    "    def __backprop(self, label):\n",
    "        pull = 0 \n",
    "        \n",
    "        if label == 1 and self.output < 1:\n",
    "            pull = 1\n",
    "        if label == -1 and self.output > -1:\n",
    "            pull = -1\n",
    "            \n",
    "        # q = ax\n",
    "        # r = by\n",
    "        # s = q + r\n",
    "        # t = s + c\n",
    "        \n",
    "        dt_ds = 1\n",
    "        dt_dc = 1\n",
    "        \n",
    "        ds_dq = 1\n",
    "        ds_dr = 1\n",
    "        \n",
    "        dr_db = y\n",
    "        dq_da = x\n",
    "        \n",
    "        ds_da = ds_dq * dq_da\n",
    "        ds_db = ds_dr * dr_db\n",
    "        \n",
    "        dt_da = pull * dt_ds * ds_da # pull * (1 * x) = pull * x\n",
    "        dt_db = pull * dt_ds * ds_db # pull * (1 * y) = pull * y\n",
    "        \n",
    "        # l1 regularization\n",
    "        self.da = dt_da - self.a # x * pull - a\n",
    "        self.db = dt_db - self.b # y * pull - b\n",
    "        self.dc = pull * dt_dc   # 1 * pull\n",
    "        \n",
    "        self.output = None\n",
    "        \n",
    "    def __update(self):\n",
    "        self.a += self.lr * self.da\n",
    "        self.b += self.lr * self.db\n",
    "        self.c += self.lr * self.dc\n",
    "        \n",
    "    def train(self, data, label):\n",
    "        self.__forward(*data)\n",
    "        self.__backprop(label)\n",
    "        self.__update()\n",
    "        \n",
    "    def predict(self, data):\n",
    "        x, y = data\n",
    "        return self.a * x + self.b * y + self.c \n",
    "        \n",
    "    def evaluate(self, dataset, labels):\n",
    "        correct = 0\n",
    "        \n",
    "        for idx in range(len(dataset)):\n",
    "            pred = svm.predict(dataset[idx])\n",
    "            if pred >= 0:\n",
    "                pred = 1\n",
    "            else:\n",
    "                pred = -1\n",
    "                \n",
    "            if labels[idx] == pred:\n",
    "                correct += 1\n",
    "                \n",
    "        return correct/len(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 1, Accuracy: 0.0\n",
      "Iteration: 26, Accuracy: 0.0\n",
      "Iteration: 51, Accuracy: 0.0\n",
      "Iteration: 76, Accuracy: 0.01\n",
      "Iteration: 101, Accuracy: 0.04\n",
      "Iteration: 126, Accuracy: 0.03\n",
      "Iteration: 151, Accuracy: 0.14\n",
      "Iteration: 176, Accuracy: 0.05\n",
      "Iteration: 201, Accuracy: 0.09\n",
      "Iteration: 226, Accuracy: 0.38\n",
      "Iteration: 251, Accuracy: 0.5\n",
      "Iteration: 276, Accuracy: 0.93\n",
      "Iteration: 301, Accuracy: 0.96\n",
      "Iteration: 326, Accuracy: 0.97\n",
      "Iteration: 351, Accuracy: 0.97\n",
      "Iteration: 376, Accuracy: 0.97\n",
      "Final Accuracy: 0.99\n"
     ]
    }
   ],
   "source": [
    "import random\n",
    "from sklearn.datasets import make_blobs\n",
    "\n",
    "data, labels = make_blobs(n_samples=100, n_features=2, centers=2)\n",
    "labels = [-1 if x == 0 else 1 for x in labels]\n",
    "\n",
    "svm = SVM(lr=0.01)\n",
    "\n",
    "for iteration in range(400):\n",
    "    idx = random.randint(0, len(data)-1)\n",
    "    d, l = data[idx], labels[idx]\n",
    "    svm.train(d, l)\n",
    "    \n",
    "    if iteration % 25 == 0:\n",
    "        acc = svm.evaluate(data, labels)\n",
    "        print(f\"Iteration: {iteration + 1}, Accuracy: {acc}\")\n",
    "\n",
    "acc = svm.evaluate(data, labels)\n",
    "print(f\"Final Accuracy: {acc}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Backpropagation Algorithm\n",
	"Python implementation of Karpathy's \"[Hacker's guide to Neural Networks](https://karpathy.github.io/neuralnets/)\"."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Objective"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"-6"
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"def forwardMultiplyGate(x, y):\n",
	" return x * y\n",
	"\n",
	"forwardMultiplyGate(-2, 3)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Random Local Search"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Best: -5.953757682116292; x: -1.9907345593023322, y: 2.990734075668446\n"
	]
	}
	],
	"source": [
	"import random\n",
	"\n",
	"x, y = -2, 3\n",
	"\n",
	"delta = 0.01\n",
	"best = float('-inf')\n",
	"best_x, best_y = x, y\n",
	"\n",
	"\n",
	"for iteration in range(100):\n",
	" new_x = x + delta * (random.random() * 2 - 1)\n",
	" new_y = y + delta * (random.random() * 2 - 1)\n",
	" \n",
	" out = forwardMultiplyGate(new_x, new_y)\n",
	" \n",
	" if out > best:\n",
	" best = out\n",
	" best_x, best_y = new_x, new_y\n",
	"\n",
	"print(f\"Best: {best}; x: {best_x}, y: {best_y}\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Numerical Gradient"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Best: -4.811235007946064; x: -1.7094225189551153, y: 2.8145382166176987\n"
	]
	}
	],
	"source": [
	"step_size = 0.001\n",
	"epsilon = 0.000001\n",
	"\n",
	"x, y = -2, 3\n",
	"\n",
	"for iteration in range(100):\n",
	" xpe = x + epsilon\n",
	" x_derivative = (forwardMultiplyGate(xpe, y) - forwardMultiplyGate(x, y))/epsilon\n",
	" \n",
	" ype = y + epsilon\n",
	" y_derivative = (forwardMultiplyGate(x, ype) - forwardMultiplyGate(x, y))/epsilon\n",
	" \n",
	" x = x + step_size * x_derivative\n",
	" y = y + step_size * y_derivative\n",
	" \n",
	"print(f\"Best: {forwardMultiplyGate(x, y)}; x: {x}, y: {y}\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Analytic Gradient"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Best: -4.811235007904252; x: -1.7094225189238874, y: 2.8145382166446553\n"
	]
	}
	],
	"source": [
	"step_size = 0.001\n",
	"\n",
	"x, y = -2, 3\n",
	"\n",
	"for iteration in range(100):\n",
	" x_derivative = y\n",
	" y_derivative = x\n",
	"\n",
	" x = x + step_size * x_derivative\n",
	" y = y + step_size * y_derivative\n",
	" \n",
	"print(f\"Best: {forwardMultiplyGate(x, y)}; x: {x}, y: {y}\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Two \"layers\" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"-12"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"def forwardAddGate(x, y):\n",
	" return x + y\n",
	"\n",
	"def forwardCircuit(x, y, z):\n",
	" return forwardMultiplyGate(forwardAddGate(x, y), z)\n",
	" \n",
	"x, y, z = -2, 5, -4\n",
	"forwardCircuit(x, y, z)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Backpropagation!"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Best: -8.326655101167699; x: -2.386421263453589, y: 4.613578736546414, z: -3.7386916739230753\n"
	]
	}
	],
	"source": [
	"step_size = 0.001\n",
	"\n",
	"x, y, z = -2, 5, -4\n",
	"\n",
	"\n",
	"for iteration in range(100):\n",
	" q = forwardAddGate(x, y)\n",
	" f = forwardMultiplyGate(q, z)\n",
	"\n",
	" df_dq = z\n",
	" df_dz = q\n",
	"\n",
	" dq_dx = 1\n",
	" dq_dy = 1\n",
	"\n",
	" df_dx = df_dq * dq_dx\n",
	" df_dy = df_dq * dq_dy\n",
	" \n",
	" gradients = [df_dx, df_dy, df_dz]\n",
	" \n",
	" x = x + step_size * df_dx\n",
	" y = y + step_size * df_dy\n",
	" z = z + step_size * df_dz\n",
	" \n",
	"print(f\"Best: {forwardCircuit(x, y, z)}; x: {x}, y: {y}, z: {z}\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## SVM\n",
	"\n",
	"$$f(x,y) = ax + by + c$$\n",
	"\n",
	"Here, $x$ and $y$ are inputs and, $a$, $b$, $c$ are parameters."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"class SVM:\n",
	" def __init__(self, lr=0.01):\n",
	" self.a = 1\n",
	" self.da = 0\n",
	" \n",
	" self.b = -2\n",
	" self.db = 0\n",
	" \n",
	" self.c = -1\n",
	" self.dc = 0\n",
	" \n",
	" self.lr = lr\n",
	" \n",
	" self.output = None\n",
	" \n",
	" def __forward(self, x, y):\n",
	" self.output = self.a * x + self.b * y + self.c \n",
	" return self.output\n",
	" \n",
	" def __backprop(self, label):\n",
	" pull = 0 \n",
	" \n",
	" if label == 1 and self.output < 1:\n",
	" pull = 1\n",
	" if label == -1 and self.output > -1:\n",
	" pull = -1\n",
	" \n",
	" # q = ax\n",
	" # r = by\n",
	" # s = q + r\n",
	" # t = s + c\n",
	" \n",
	" dt_ds = 1\n",
	" dt_dc = 1\n",
	" \n",
	" ds_dq = 1\n",
	" ds_dr = 1\n",
	" \n",
	" dr_db = y\n",
	" dq_da = x\n",
	" \n",
	" ds_da = ds_dq * dq_da\n",
	" ds_db = ds_dr * dr_db\n",
	" \n",
	" dt_da = pull * dt_ds * ds_da # pull * (1 * x) = pull * x\n",
	" dt_db = pull * dt_ds * ds_db # pull * (1 * y) = pull * y\n",
	" \n",
	" # l1 regularization\n",
	" self.da = dt_da - self.a # x * pull - a\n",
	" self.db = dt_db - self.b # y * pull - b\n",
	" self.dc = pull * dt_dc # 1 * pull\n",
	" \n",
	" self.output = None\n",
	" \n",
	" def __update(self):\n",
	" self.a += self.lr * self.da\n",
	" self.b += self.lr * self.db\n",
	" self.c += self.lr * self.dc\n",
	" \n",
	" def train(self, data, label):\n",
	" self.__forward(*data)\n",
	" self.__backprop(label)\n",
	" self.__update()\n",
	" \n",
	" def predict(self, data):\n",
	" x, y = data\n",
	" return self.a * x + self.b * y + self.c \n",
	" \n",
	" def evaluate(self, dataset, labels):\n",
	" correct = 0\n",
	" \n",
	" for idx in range(len(dataset)):\n",
	" pred = svm.predict(dataset[idx])\n",
	" if pred >= 0:\n",
	" pred = 1\n",
	" else:\n",
	" pred = -1\n",
	" \n",
	" if labels[idx] == pred:\n",
	" correct += 1\n",
	" \n",
	" return correct/len(dataset)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Iteration: 1, Accuracy: 0.0\n",
	"Iteration: 26, Accuracy: 0.0\n",
	"Iteration: 51, Accuracy: 0.0\n",
	"Iteration: 76, Accuracy: 0.01\n",
	"Iteration: 101, Accuracy: 0.04\n",
	"Iteration: 126, Accuracy: 0.03\n",
	"Iteration: 151, Accuracy: 0.14\n",
	"Iteration: 176, Accuracy: 0.05\n",
	"Iteration: 201, Accuracy: 0.09\n",
	"Iteration: 226, Accuracy: 0.38\n",
	"Iteration: 251, Accuracy: 0.5\n",
	"Iteration: 276, Accuracy: 0.93\n",
	"Iteration: 301, Accuracy: 0.96\n",
	"Iteration: 326, Accuracy: 0.97\n",
	"Iteration: 351, Accuracy: 0.97\n",
	"Iteration: 376, Accuracy: 0.97\n",
	"Final Accuracy: 0.99\n"
	]
	}
	],
	"source": [
	"import random\n",
	"from sklearn.datasets import make_blobs\n",
	"\n",
	"data, labels = make_blobs(n_samples=100, n_features=2, centers=2)\n",
	"labels = [-1 if x == 0 else 1 for x in labels]\n",
	"\n",
	"svm = SVM(lr=0.01)\n",
	"\n",
	"for iteration in range(400):\n",
	" idx = random.randint(0, len(data)-1)\n",
	" d, l = data[idx], labels[idx]\n",
	" svm.train(d, l)\n",
	" \n",
	" if iteration % 25 == 0:\n",
	" acc = svm.evaluate(data, labels)\n",
	" print(f\"Iteration: {iteration + 1}, Accuracy: {acc}\")\n",
	"\n",
	"acc = svm.evaluate(data, labels)\n",
	"print(f\"Final Accuracy: {acc}\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}