kingychiu/Code a Neural Network with Numpy.ipynb

## Code a Neural Network with Numpy.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       "table {float:left}\n",
       "</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<style>\n",
    "table {float:left}\n",
    "</style>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1 Define Training Data\\n\n",
    "## A xor B\n",
    "\n",
    "| A   | B   | A xor B  |\n",
    "|:---:|:---:|:--------:|    \n",
    "|  F  |  F  |    F     |\n",
    "|  F  |  T  |    T     |\n",
    "|  T  |  F  |    T     |\n",
    "|  T  |  T  |    F     |\n",
    "   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 312,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define A XOR B\n",
    "\n",
    "x1 = np.matrix([[0], [0]])\n",
    "x2 = np.matrix([[0], [1]])\n",
    "x3 = np.matrix([[1], [0]])\n",
    "x4 = np.matrix([[1], [1]])\n",
    "\n",
    "#              T     F\n",
    "t1 = np.matrix([[0], [1]])\n",
    "t2 = np.matrix([[1], [0]])\n",
    "t3 = np.matrix([[1], [0]])\n",
    "t4 = np.matrix([[0], [1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 313,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sigmoid is one of the most common activation function = \n",
    "def sigmoid(v):\n",
    "    return 1/(1+np.exp(-v))\n",
    "\n",
    "def d_sigmoid(sigmoided_v):\n",
    "    # Hadamard Product\n",
    "    return np.multiply(sigmoided_v, (1 - sigmoided_v))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 340,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Layer:\n",
    "    def __init__(self, prev_dimension, output_dimension, activation_function):\n",
    "        # the number of neurons of this layer\n",
    "        self.output_dimension = output_dimension\n",
    "        # the number of neurons of prev. layer\n",
    "        self.prev_dimension = prev_dimension\n",
    "        # the weight\n",
    "        self.weight = np.random.rand(self.prev_dimension, self.output_dimension)\n",
    "        # the activation function of this layer (f_l)\n",
    "        self.activation_function = activation_function\n",
    "        # define the output of this layer (a_l)\n",
    "        self.output_matrix = np.random.rand(self.output_dimension, 1)\n",
    "    \n",
    "    # caluate this layer output a_l by the previous layer output a_l-1\n",
    "    def feed_forward(self, previous_output):\n",
    "        # Z = W.T A\n",
    "        Z = self.weight.transpose().dot(previous_output)\n",
    "        self.output_matrix = self.activation_function(Z)\n",
    "        return self.output_matrix\n",
    "    \n",
    "    # back_propagate the error\n",
    "    def update_weight(self, prev_output, next_delta, next_weight=None):\n",
    "        if next_weight is not None:\n",
    "            self.delta = np.multiply(d_sigmoid(self.output_matrix), next_weight.dot(next_delta))\n",
    "        else:\n",
    "            # output layer\n",
    "            self.delta = np.multiply(d_sigmoid(self.output_matrix), next_delta)\n",
    "\n",
    "        self.weight = self.weight - prev_output.dot(self.delta.transpose())\n",
    "        \n",
    "class InputLayer(Layer):\n",
    "    def __init__(self, input_dimension):\n",
    "        Layer.__init__(self, input_dimension, input_dimension, lambda x: x)\n",
    "        \n",
    "class NeuralNetwork():\n",
    "\n",
    "    def __init__(self, layers):\n",
    "        self.layers = layers\n",
    "        # init weights based on layers' dimension (inter connecting weights)\n",
    "        for l in range(1, len(self.layers)):\n",
    "            print('W'+str(l), self.layers[l].prev_dimension, 'x',self.layers[l].output_dimension)\n",
    "            \n",
    "    def feed_forward(self, input_matrix):\n",
    "        # handle input layer\n",
    "        self.layers[0].output_matrix = input_matrix\n",
    "        # skip the input layer, start from the first hidden layer\n",
    "        for l in range(1, len(self.layers)):\n",
    "            self.layers[l].feed_forward(self.layers[l-1].output_matrix)\n",
    "        return self.layers[-1].output_matrix\n",
    "                \n",
    "    def back_propagate(self, y, t):\n",
    "        \n",
    "        # Random error function here (0.5 * sq(y-t))\n",
    "        output_error = y - t\n",
    "        \n",
    "        # output layers' delta\n",
    "        self.layers[-1].update_weight(self.layers[-2].output_matrix, output_error)\n",
    "        \n",
    "        # hidden layers' delta\n",
    "        for l in range(len(self.layers)-2, 0, -1):\n",
    "            self.layers[l].update_weight(self.layers[l-1].output_matrix, self.layers[l+1].delta, \n",
    "                                                               self.layers[l+1].weight)\n",
    "    \n",
    "    def train(self, training_samples, training_labels, num_epoch):\n",
    "        for i in range(num_epoch + 1):\n",
    "            epoch_err = []\n",
    "            for j in range(len(training_samples)):    \n",
    "                y = self.feed_forward(training_samples[j])\n",
    "                t = training_labels[j]\n",
    "                self.back_propagate(y, t)\n",
    "                sample_mean_sq_err = ((y-t)[0] ** 2).mean()\n",
    "                epoch_err.append(sample_mean_sq_err)\n",
    "            if i % 1000 == 0:\n",
    "                print('epoch', i,  'MSE', np.array(epoch_err).mean())\n",
    "    \n",
    "    def predict(self, input_matrix):\n",
    "        y = self.feed_forward(input_matrix)\n",
    "        return y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 341,
   "metadata": {},
   "outputs": [],
   "source": [
    "il = InputLayer(2)\n",
    "h1 = Layer(2, 3, sigmoid)\n",
    "h2 = Layer(3, 3, sigmoid)\n",
    "ol = Layer(3, 2, sigmoid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 342,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "W1 2 x 3\n",
      "W2 3 x 3\n",
      "W3 3 x 2\n"
     ]
    }
   ],
   "source": [
    "ann = NeuralNetwork([\n",
    "        il, h1, h2, ol  \n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 343,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[0.79904654],\n",
       "        [0.65283624]])"
      ]
     },
     "execution_count": 343,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ann.feed_forward(x1)\n",
    "ann.feed_forward(x2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 344,
   "metadata": {},
   "outputs": [],
   "source": [
    "ann.back_propagate(ann.feed_forward(x1), t1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 345,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch 0 MSE 0.32581289576740524\n",
      "epoch 1000 MSE 0.29897317783822236\n",
      "epoch 2000 MSE 0.2989358584352277\n",
      "epoch 3000 MSE 0.18703473245270702\n",
      "epoch 4000 MSE 0.0012912454992666748\n",
      "epoch 5000 MSE 0.0005961645130699109\n",
      "epoch 6000 MSE 0.00038254827396957947\n",
      "epoch 7000 MSE 0.00027986268393424704\n",
      "epoch 8000 MSE 0.00021982233025093738\n",
      "epoch 9000 MSE 0.00018055497378757687\n",
      "epoch 10000 MSE 0.0001529297270209627\n"
     ]
    }
   ],
   "source": [
    "ann.train([x1, x2, x3, x4], [t1, t2, t3, t4], 10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<style>\n",
	"table {float:left}\n",
	"</style>"
	],
	"text/plain": [
	"<IPython.core.display.HTML object>"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"%%html\n",
	"<style>\n",
	"table {float:left}\n",
	"</style>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# 1 Define Training Data\\n\n",
	"## A xor B\n",
	"\n",
	"\| A \| B \| A xor B \|\n",
	"\|:---:\|:---:\|:--------:\| \n",
	"\| F \| F \| F \|\n",
	"\| F \| T \| T \|\n",
	"\| T \| F \| T \|\n",
	"\| T \| T \| F \|\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 312,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Define A XOR B\n",
	"\n",
	"x1 = np.matrix([[0], [0]])\n",
	"x2 = np.matrix([[0], [1]])\n",
	"x3 = np.matrix([[1], [0]])\n",
	"x4 = np.matrix([[1], [1]])\n",
	"\n",
	"# T F\n",
	"t1 = np.matrix([[0], [1]])\n",
	"t2 = np.matrix([[1], [0]])\n",
	"t3 = np.matrix([[1], [0]])\n",
	"t4 = np.matrix([[0], [1]])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 313,
	"metadata": {},
	"outputs": [],
	"source": [
	"# sigmoid is one of the most common activation function = \n",
	"def sigmoid(v):\n",
	" return 1/(1+np.exp(-v))\n",
	"\n",
	"def d_sigmoid(sigmoided_v):\n",
	" # Hadamard Product\n",
	" return np.multiply(sigmoided_v, (1 - sigmoided_v))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 340,
	"metadata": {},
	"outputs": [],
	"source": [
	"class Layer:\n",
	" def __init__(self, prev_dimension, output_dimension, activation_function):\n",
	" # the number of neurons of this layer\n",
	" self.output_dimension = output_dimension\n",
	" # the number of neurons of prev. layer\n",
	" self.prev_dimension = prev_dimension\n",
	" # the weight\n",
	" self.weight = np.random.rand(self.prev_dimension, self.output_dimension)\n",
	" # the activation function of this layer (f_l)\n",
	" self.activation_function = activation_function\n",
	" # define the output of this layer (a_l)\n",
	" self.output_matrix = np.random.rand(self.output_dimension, 1)\n",
	" \n",
	" # caluate this layer output a_l by the previous layer output a_l-1\n",
	" def feed_forward(self, previous_output):\n",
	" # Z = W.T A\n",
	" Z = self.weight.transpose().dot(previous_output)\n",
	" self.output_matrix = self.activation_function(Z)\n",
	" return self.output_matrix\n",
	" \n",
	" # back_propagate the error\n",
	" def update_weight(self, prev_output, next_delta, next_weight=None):\n",
	" if next_weight is not None:\n",
	" self.delta = np.multiply(d_sigmoid(self.output_matrix), next_weight.dot(next_delta))\n",
	" else:\n",
	" # output layer\n",
	" self.delta = np.multiply(d_sigmoid(self.output_matrix), next_delta)\n",
	"\n",
	" self.weight = self.weight - prev_output.dot(self.delta.transpose())\n",
	" \n",
	"class InputLayer(Layer):\n",
	" def __init__(self, input_dimension):\n",
	" Layer.__init__(self, input_dimension, input_dimension, lambda x: x)\n",
	" \n",
	"class NeuralNetwork():\n",
	"\n",
	" def __init__(self, layers):\n",
	" self.layers = layers\n",
	" # init weights based on layers' dimension (inter connecting weights)\n",
	" for l in range(1, len(self.layers)):\n",
	" print('W'+str(l), self.layers[l].prev_dimension, 'x',self.layers[l].output_dimension)\n",
	" \n",
	" def feed_forward(self, input_matrix):\n",
	" # handle input layer\n",
	" self.layers[0].output_matrix = input_matrix\n",
	" # skip the input layer, start from the first hidden layer\n",
	" for l in range(1, len(self.layers)):\n",
	" self.layers[l].feed_forward(self.layers[l-1].output_matrix)\n",
	" return self.layers[-1].output_matrix\n",
	" \n",
	" def back_propagate(self, y, t):\n",
	" \n",
	" # Random error function here (0.5 * sq(y-t))\n",
	" output_error = y - t\n",
	" \n",
	" # output layers' delta\n",
	" self.layers[-1].update_weight(self.layers[-2].output_matrix, output_error)\n",
	" \n",
	" # hidden layers' delta\n",
	" for l in range(len(self.layers)-2, 0, -1):\n",
	" self.layers[l].update_weight(self.layers[l-1].output_matrix, self.layers[l+1].delta, \n",
	" self.layers[l+1].weight)\n",
	" \n",
	" def train(self, training_samples, training_labels, num_epoch):\n",
	" for i in range(num_epoch + 1):\n",
	" epoch_err = []\n",
	" for j in range(len(training_samples)): \n",
	" y = self.feed_forward(training_samples[j])\n",
	" t = training_labels[j]\n",
	" self.back_propagate(y, t)\n",
	" sample_mean_sq_err = ((y-t)[0] ** 2).mean()\n",
	" epoch_err.append(sample_mean_sq_err)\n",
	" if i % 1000 == 0:\n",
	" print('epoch', i, 'MSE', np.array(epoch_err).mean())\n",
	" \n",
	" def predict(self, input_matrix):\n",
	" y = self.feed_forward(input_matrix)\n",
	" return y"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 341,
	"metadata": {},
	"outputs": [],
	"source": [
	"il = InputLayer(2)\n",
	"h1 = Layer(2, 3, sigmoid)\n",
	"h2 = Layer(3, 3, sigmoid)\n",
	"ol = Layer(3, 2, sigmoid)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 342,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"W1 2 x 3\n",
	"W2 3 x 3\n",
	"W3 3 x 2\n"
	]
	}
	],
	"source": [
	"ann = NeuralNetwork([\n",
	" il, h1, h2, ol \n",
	" ])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 343,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"matrix([[0.79904654],\n",
	" [0.65283624]])"
	]
	},
	"execution_count": 343,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ann.feed_forward(x1)\n",
	"ann.feed_forward(x2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 344,
	"metadata": {},
	"outputs": [],
	"source": [
	"ann.back_propagate(ann.feed_forward(x1), t1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 345,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"epoch 0 MSE 0.32581289576740524\n",
	"epoch 1000 MSE 0.29897317783822236\n",
	"epoch 2000 MSE 0.2989358584352277\n",
	"epoch 3000 MSE 0.18703473245270702\n",
	"epoch 4000 MSE 0.0012912454992666748\n",
	"epoch 5000 MSE 0.0005961645130699109\n",
	"epoch 6000 MSE 0.00038254827396957947\n",
	"epoch 7000 MSE 0.00027986268393424704\n",
	"epoch 8000 MSE 0.00021982233025093738\n",
	"epoch 9000 MSE 0.00018055497378757687\n",
	"epoch 10000 MSE 0.0001529297270209627\n"
	]
	}
	],
	"source": [
	"ann.train([x1, x2, x3, x4], [t1, t2, t3, t4], 10000)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}