Skip to content

Instantly share code, notes, and snippets.

@kodamap
Created April 5, 2020 11:09
Show Gist options
  • Save kodamap/67db4dcd050cae03fb56ff53e28321aa to your computer and use it in GitHub Desktop.
Save kodamap/67db4dcd050cae03fb56ff53e28321aa to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deep Neural Network"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import statsmodels.api as sm\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import time\n",
"import joblib\n",
"import os\n",
"from pprint import pprint\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Utils and activation functions"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"#from utils.utils import *\n",
"%reload_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# utils.py\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def plot_history(history_data, learning_rate=0.005):\n",
" plt.figure() \n",
" costs = history_data[0]\n",
" accus = history_data[1]\n",
" plt.plot(costs, label=\"cost\")\n",
" plt.plot(accus, label=\"accuracy\")\n",
" plt.ylabel('cost / accuracy')\n",
" plt.xlabel('iterations (per hundreds)')\n",
" plt.title(\"Learning rate = {}\".format(learning_rate))\n",
" plt.legend()\n",
" plt.show()\n",
" \n",
"def show_digits(X, Y, Y_pred=None, num=10):\n",
" # plot settings\n",
" cols = 10 # display 10 images per rows\n",
" rows = num // cols\n",
" font = {'family': 'DejaVu Sans',\n",
" 'color': 'darkred',\n",
" 'weight': 'normal',\n",
" 'size': 12,\n",
" } \n",
" plt.figure(figsize=(cols, rows + 1)) \n",
" plt.subplots_adjust(top=0.99, bottom=0.1, hspace=0.1, wspace=0.4)\n",
" plt.gray()\n",
" for i in range(num):\n",
" ax = plt.subplot(rows + 1, cols, i + 1)\n",
" if (type(Y_pred).__module__ == 'numpy') and (Y[i] != Y_pred[i]): \n",
" ax.set_title('{} (y:{})'.format(Y_pred[i], Y[i]), fontdict=font)\n",
" else:\n",
" ax.set_title('y:{}'.format(Y[i]))\n",
" ax.axis('off')\n",
" plt.imshow(X[i])\n",
" plt.show()\n",
"\n",
"def flatten_X(X_train,X_test):\n",
" m, w, h = X_train.shape\n",
" X_train = X_train.reshape(m, w * h) / 255.\n",
" m, w, h = X_test.shape\n",
" X_test = X_test.reshape(m, w * h) / 255.\n",
" return X_train, X_test\n",
"\n",
"def to_categorical(y, num_classes=None):\n",
" \"\"\"Converts a class vector (integers) to binary class matrix.\n",
" # ref: https://github.com/keras-team/keras/blob/master/keras/utils/np_utils.py\n",
" \"\"\"\n",
" y = np.array(y, dtype='int')\n",
" if not num_classes:\n",
" num_classes = np.max(y) + 1\n",
" input_shape = y.shape\n",
" categorical = np.zeros((input_shape[0], num_classes))\n",
" categorical[np.arange(input_shape[0]), y] = 1 \n",
" return categorical\n",
"\n",
"\n",
"def f1_score(y_true, y_pred):\n",
" \"\"\"\n",
" datatype of y_true, y_pred must be ndarray.\n",
" \"\"\"\n",
" try:\n",
" assert(type(y_true).__module__ == 'numpy')\n",
" except AssertionError as err:\n",
" y_true = y_true.values\n",
"\n",
" try:\n",
" assert(type(y_pred).__module__ == 'numpy')\n",
" except AssertionError as err:\n",
" y_pred = y_pred.values\n",
" \n",
" true_idx = np.where(y_true == y_pred)\n",
" false_idx = np.where(y_true != y_pred)\n",
"\n",
" TN = np.count_nonzero(y_true[true_idx] == 0)\n",
" TP = np.count_nonzero(y_true[true_idx] == 1)\n",
" # this means false positive (pred = 1)\n",
" FP = np.count_nonzero(y_true[false_idx] == 0)\n",
" # this means false negative (pred = 0)\n",
" FN = np.count_nonzero(y_true[false_idx] == 1)\n",
"\n",
" confusion_matrix = np.array(([TN, FP], [FN, TP]))\n",
" recall = TP / (TP + FN)\n",
" precision = TP / (TP + FP)\n",
" f_score = 2 * (recall * precision) / (recall + precision)\n",
" idxes = (true_idx, false_idx)\n",
" return f_score, idxes, confusion_matrix, recall, precision\n",
"\n",
"def show_confusion_matrix(f1_score, idxes, cm, recall, precision):\n",
" TN, FP, FN, TP = cm[0][0], cm[0][1], cm[1][0], cm[1][1] \n",
" print(\"-\"*40)\n",
" print(\"TN: {:04}, FP: {:04}\".format(TN, FP))\n",
" print(\"FN: {:04}, TP: {:04}\".format(FN, TP))\n",
" print(\"Recall: {:.2f}, Precision: {:.2f}\".format(recall, precision))\n",
" print(\"F1 Score: {:.2f}\".format(f1_score))\n",
" print(\"-\"*40)\n",
" \n",
"def ramdom_mini_batches(X, Y, mini_batch_size = 64, seed = 0):\n",
" \"\"\"expected shape\n",
" X: (m, 784)\n",
" Y: (1, m)\n",
" \"\"\"\n",
" np.random.seed(seed)\n",
" m = X.shape[0]\n",
" mini_batches = []\n",
" permutation = list(np.random.permutation(m))\n",
" shuffled_X = X[permutation]\n",
" if Y.shape[1] == 10:\n",
" shuffled_Y = Y[permutation]\n",
" num_complete_minibatches = m // mini_batch_size\n",
" for i in range(num_complete_minibatches):\n",
" mini_batch_X = shuffled_X[i * mini_batch_size : (i + 1) * mini_batch_size]\n",
" mini_batch_Y = shuffled_Y[i * mini_batch_size : (i + 1) * mini_batch_size]\n",
" mini_batch = (mini_batch_X, mini_batch_Y)\n",
" mini_batches.append(mini_batch)\n",
" \n",
" if m % mini_batch_size != 0:\n",
" mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m]\n",
" mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m]\n",
" mini_batch = (mini_batch_X, mini_batch_Y)\n",
" mini_batches.append(mini_batch)\n",
" else:\n",
" shuffled_Y = Y[:,permutation]\n",
" num_complete_minibatches = m // mini_batch_size\n",
" for i in range(num_complete_minibatches):\n",
" mini_batch_X = shuffled_X[i * mini_batch_size : (i + 1) * mini_batch_size]\n",
" mini_batch_Y = shuffled_Y[:, i * mini_batch_size : (i + 1) * mini_batch_size]\n",
" mini_batch = (mini_batch_X, mini_batch_Y)\n",
" mini_batches.append(mini_batch)\n",
" \n",
" if m % mini_batch_size != 0:\n",
" mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m]\n",
" mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]\n",
" mini_batch = (mini_batch_X, mini_batch_Y)\n",
" mini_batches.append(mini_batch)\n",
" \n",
" return mini_batches"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# activation functions\n",
"import numpy as np\n",
"import os\n",
"\n",
"def sigmoid(Z):\n",
" return 1 / (1 + np.exp(-Z))\n",
"\n",
"def softmax(Z):\n",
" e_x = np.exp(Z - np.max(Z, axis=1, keepdims=True))\n",
" return e_x / np.sum(e_x, axis=1, keepdims=True)\n",
"\n",
"def relu(Z):\n",
" return np.maximum(0,Z)\n",
"\n",
"def leakyrelu(Z):\n",
" A = Z.copy()\n",
" A[Z <= 0] *= 0.01\n",
" return A"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data observation"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(60000, 28, 28) (10000, 28, 28)\n"
]
}
],
"source": [
"from keras.datasets import mnist\n",
"(X_train, Y_train), (X_test, Y_test) = mnist.load_data()\n",
"print(X_train.shape, X_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x216 with 20 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"show_digits(X_train, Y_train, num=20)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Original\n",
"X_train:(60000, 784) Y_train:(60000,) X_test:(10000, 784) Y_test:(10000,)\n",
"Categorical\n",
"X_train:(60000, 784) Y_train:(60000, 10) X_test:(10000, 784) Y_test:(10000, 10)\n"
]
}
],
"source": [
"(X_train, Y_train), (X_test, Y_test) = mnist.load_data()\n",
"X_train, X_test = flatten_X(X_train, X_test)\n",
"print(\"Original\\nX_train:{} Y_train:{} X_test:{} Y_test:{}\".format(\n",
" X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))\n",
"\n",
"# convert Y to categorical data\n",
"categorical_Y_train = to_categorical(Y_train)\n",
"categorical_Y_test = to_categorical(Y_test)\n",
"print(\"Categorical\\nX_train:{} Y_train:{} X_test:{} Y_test:{}\".format(\n",
" X_train.shape, categorical_Y_train.shape, X_test.shape, categorical_Y_test.shape))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Defining the deep neural network model \n",
"\n",
"Thease codes are based on **Coursera 01_Neural Networks and Deep Learning /week3/Planar data classification with one hidden layer \n",
"/week3/Planar data classification with one hidden layer**"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import time\n",
"import joblib\n",
"import os\n",
"from utils.utils import *\n",
"from utils.activation_function import softmax, sigmoid, relu, leakyrelu\n",
"\n",
"class DeepNN():\n",
"\n",
" def __init__(self):\n",
" self.layers = []\n",
" \n",
" def initialize_parameters(self):\n",
" np.random.seed(2) \n",
" parameters = {}\n",
" for l in range(1, len(self.layers)):\n",
" parameters['W' + str(l)] = np.random.randn(\n",
" self.layers[l-1]['input'], self.layers[l]['input']) * 0.01\n",
" parameters['b' + str(l)] = np.zeros((self.layers[l]['input'],))\n",
" return parameters\n",
"\n",
" def initialize_adam(self, parameters):\n",
" v = {}\n",
" s = {}\n",
" for l in range(1, len(self.layers)):\n",
" v['dW' + str(l)] = np.zeros((parameters['W' + str(l)].shape))\n",
" v['db' + str(l)] = np.zeros((parameters['b' + str(l)].shape))\n",
" s['dW' + str(l)] = np.zeros((parameters['W' + str(l)].shape))\n",
" s['db' + str(l)] = np.zeros((parameters['b' + str(l)].shape))\n",
" return v, s\n",
" \n",
" def initialize_parameters_he(self):\n",
" np.random.seed(2) \n",
" parameters = {}\n",
" for l in range(1, len(self.layers)):\n",
" parameters['W' + str(l)] = np.random.randn(\n",
" self.layers[l-1]['input'], self.layers[l]['input']) * np.sqrt(2./self.layers[l-1]['input'])\n",
" parameters['b' + str(l)] = np.zeros((self.layers[l]['input'],))\n",
" return parameters\n",
"\n",
" def add(self, input=0, activation=\"\"):\n",
" layer = {}\n",
" layer['input'] = input\n",
" layer['activation'] = activation\n",
" self.layers.append(layer)\n",
" \n",
" def forward_prop(self, W, b, A, activation):\n",
" A_prev = A.copy()\n",
" Z = np.dot(A, W) + b \n",
" \n",
" if activation == 'relu':\n",
" A = relu(Z)\n",
" if activation == 'leakyrelu':\n",
" A = leakyrelu(Z)\n",
" elif activation == 'tanh':\n",
" A = np.tanh(Z)\n",
" elif activation == 'sigmoid':\n",
" A = sigmoid(Z)\n",
" elif activation == 'softmax':\n",
" A = softmax(Z) \n",
" cache = {'A_prev':A_prev, 'Z':Z, 'W':W, 'b':b}\n",
" return A, cache\n",
"\n",
" def L_model_forward(self, parameters, X):\n",
" caches = []\n",
" L = len(parameters) // 2\n",
" A = X.copy()\n",
" for l in range(L):\n",
" activation = self.layers[l+1]['activation']\n",
" W = parameters['W' + str(l+1)]\n",
" b = parameters['b' + str(l+1)]\n",
" A, cache = self.forward_prop(W, b, A, activation)\n",
" caches.append(cache)\n",
" return A, caches\n",
" \n",
" def compute_cost(self, A, Y):\n",
" m = Y.shape[0]\n",
" try:\n",
" cost = -1 / m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))\n",
" except ZeroDivisionError as err:\n",
" print(err) \n",
" os._exit(1)\n",
" return cost\n",
"\n",
" def backward_prop(self, dZ, cache, m, activation):\n",
" Z = cache['Z'] \n",
" A_prev = cache['A_prev'] \n",
" W = cache['W']\n",
" b = cache['b']\n",
" if activation == \"softmax\":\n",
" dW = 1/m * np.dot(A_prev.T, dZ)\n",
" db = 1/m * np.sum(dZ, axis=0)\n",
" dA_prev = np.dot(dZ, W.T) * (1-np.power(A_prev,2))\n",
" ##dA_prev = np.dot(dZ, W.T)\n",
" if activation == \"relu\":\n",
" dZ[Z <= 0] = 0\n",
" dW = 1/m * np.dot(A_prev.T, dZ)\n",
" db = 1/m * np.sum(dZ, axis=0)\n",
" dA_prev = np.dot(dZ, W.T)\n",
" if activation == \"leakyrelu\":\n",
" dZ[Z <= 0] * 0.01\n",
" dW = 1/m * np.dot(A_prev.T, dZ)\n",
" db = 1/m * np.sum(dZ, axis=0)\n",
" dA_prev = np.dot(dZ, W.T)\n",
" if activation == \"tanh\":\n",
" dW = 1/m * np.dot(A_prev.T, dZ)\n",
" db = 1/m * np.sum(dZ, axis=0)\n",
" dA_prev = np.dot(dZ, W.T)\n",
" return dA_prev, dW, db\n",
" \n",
" def L_model_backward(self, A, caches, X, Y):\n",
" \"\"\"\n",
" cache = {'Z':Z, 'A':A, 'A_prev':A_prev, 'W':W, 'b':b}\n",
" \"\"\"\n",
" grads = {}\n",
" m = X.shape[0]\n",
" L = len(caches)\n",
" dZ = A - Y\n",
" grads['dA_prev' + str(L)] = dZ\n",
" for l in reversed(range(L)):\n",
" # Softmax > relu\n",
" activation = self.layers[l+1]['activation']\n",
" dA_prev, dW, db = self.backward_prop(grads['dA_prev' + str(l+1)], caches[l], m, activation)\n",
" grads[\"dA_prev\" + str(l)] = dA_prev\n",
" grads[\"dW\" + str(l+1)] = dW\n",
" grads[\"db\" + str(l+1)] = db\n",
" return grads\n",
"\n",
" def update_parameters(self, parameters, grads, learning_rate):\n",
" L = len(parameters) // 2\n",
" for l in range(1, len(self.layers)):\n",
" parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' + str(l)] # (m, hidden_size)\n",
" parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads['db' + str(l)] # (m, hidden_size)\n",
" return parameters\n",
"\n",
" def update_parameters_with_adam(self, parameters, grads, v, s, t, learning_rate,\n",
" beta1=0.9, beta2=0.999, epsilon=1e-8):\n",
" v_corrected = {}\n",
" s_corrected = {}\n",
" for l in range(1, len(self.layers)):\n",
" v['dW' + str(l)] = beta1 * v['dW' + str(l)] + (1 - beta1) * grads['dW' + str(l)]\n",
" v['db' + str(l)] = beta1 * v['db' + str(l)] + (1 - beta1) * grads['db' + str(l)]\n",
" v_corrected['dW' + str(l)] = v['dW' + str(l)] / 1-np.power(beta1, t)\n",
" v_corrected['db' + str(l)] = v['db' + str(l)] / 1-np.power(beta1, t)\n",
" s['dW' + str(l)] = beta2 * s['dW' + str(l)] + (1 - beta2) * np.power(grads['dW' + str(l)], 2)\n",
" s['db' + str(l)] = beta2 * s['db' + str(l)] + (1 - beta2) * np.power(grads['db' + str(l)], 2)\n",
" s_corrected['dW' + str(l)] = s['dW' + str(l)] / 1-np.power(beta2, t)\n",
" s_corrected['db' + str(l)] = s['db' + str(l)] / 1-np.power(beta2, t)\n",
" parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * (\n",
" v_corrected['dW' + str(l)] / (np.sqrt(np.abs(s_corrected['dW' + str(l)])) + epsilon))\n",
" parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * (\n",
" v_corrected['db' + str(l)] / (np.sqrt(np.abs(s_corrected['db' + str(l)])) + epsilon))\n",
" return parameters, v, s\n",
" \n",
" def predict(self, parameters, X):\n",
" return self.L_model_forward(parameters, X)\n",
"\n",
" def evaluate(self, y_true, y_pred):\n",
" return np.sum(y_true * y_pred) / np.count_nonzero(y_true)\n",
" \n",
" def fit(self, X, Y, activation, optimizer, learning_rate=0.005, epochs=2,\n",
" batch_size=64, print_cost=True):\n",
" num_classes = Y.shape[1]\n",
" costs = []\n",
" accus = []\n",
" duration = 0\n",
" total_time = 0\n",
" \n",
" #parameters = self.initialize_parameters()\n",
" parameters = self.initialize_parameters_he()\n",
"\n",
" if optimizer == 'adam':\n",
" t = 0 # adam counter\n",
" v, s = self.initialize_adam(parameters)\n",
" elif optimizer == 'gd':\n",
" pass\n",
"\n",
" for i in range(1, epochs+1):\n",
" start = time.time()\n",
" mini_batches = ramdom_mini_batches(X, Y)\n",
" \n",
" for mini_batch in mini_batches:\n",
" mini_batch_X, mini_batch_Y = mini_batch\n",
" # 1. Forward propagation\n",
" A, caches = self.L_model_forward(parameters, mini_batch_X)\n",
" # 2. Compute cost\n",
" cost = self.compute_cost(A, mini_batch_Y)\n",
" # 3. Backward propagation\n",
" grads = self.L_model_backward(A, caches, mini_batch_X, mini_batch_Y)\n",
" # 4. Update parameters\n",
" if optimizer == 'adam':\n",
" t = t + 1 # Adam counter\n",
" parameters, v, s = self.update_parameters_with_adam(parameters, grads, v, s, t, learning_rate,\n",
" beta1=0.9, beta2=0.999, epsilon=1e-8)\n",
" elif optimizer == 'gd':\n",
" parameters = self.update_parameters(parameters, grads, learning_rate)\n",
" \n",
" # evalueate model\n",
" prob, _ = self.predict(parameters, X) # returns activation(probability) and cache\n",
" prob = to_categorical(prob.argmax(axis=1), num_classes)\n",
" acc = self.evaluate(Y, prob)\n",
" accus.append(acc)\n",
" costs.append(cost)\n",
" \n",
" end = time.time()\n",
" duration += (end - start)\n",
" total_time += duration\n",
" if print_cost and i % 1 == 0:\n",
" # evalueate model\n",
" print(\"{}/{} epochs time:{:.1f}s cost:{:.6f}, acc:{:.6f}\".format(i, epochs, duration, cost, acc))\n",
" duration = 0\n",
" \n",
" return parameters, costs, accus, total_time"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training model\n",
"\n",
"Training model with following each step.\n",
"\n",
"1. L Model Forward propagation\n",
"1. Compute cost\n",
"1. L Model Backward propagation\n",
"1. Update parameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hyper parameters"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>epochs</th>\n",
" <th>batch_size</th>\n",
" <th>activation</th>\n",
" <th>learning_rate</th>\n",
" <th>optimizer</th>\n",
" <th>lambd</th>\n",
" <th>regularization</th>\n",
" <th>print_cost</th>\n",
" <th>layer_dims</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>hyper params</th>\n",
" <td>6</td>\n",
" <td>128</td>\n",
" <td>tanh</td>\n",
" <td>0.05</td>\n",
" <td>gd</td>\n",
" <td>0.1</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>[784, 50, 10]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" epochs batch_size activation learning_rate optimizer lambd \\\n",
"hyper params 6 128 tanh 0.05 gd 0.1 \n",
"\n",
" regularization print_cost layer_dims \n",
"hyper params False True [784, 50, 10] "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"epochs = 6\n",
"batch_size = 128\n",
"optimizer = 'gd' # adam or gd(gradient descent)\n",
"lambd = 0.1\n",
"lr_activation = ('tanh', 0.05)\n",
"#lr_activation = ('tanh', 0.0007) #adam\n",
"#lr_activation = ('relu', 0.0075)\n",
"#lr_activation = ('relu', 0.0005) # adam\n",
"activation = lr_activation[0]\n",
"learning_rate = lr_activation[1]\n",
"regularization = False\n",
"print_cost = True\n",
"layer_dims = [784,50,10]\n",
"\n",
"hyper_params = {}\n",
"hyper_params['epochs'] = epochs\n",
"hyper_params['batch_size'] = batch_size\n",
"hyper_params['activation'] = activation\n",
"hyper_params['learning_rate'] = learning_rate\n",
"hyper_params['optimizer'] = optimizer\n",
"hyper_params['lambd'] = lambd \n",
"hyper_params['regularization'] = regularization\n",
"hyper_params['print_cost'] = print_cost\n",
"hyper_params['layer_dims'] = str(layer_dims)\n",
"\n",
"pd.DataFrame(hyper_params,index=[\"hyper params\",])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define and train model"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'activation': '', 'input': 784},\n",
" {'activation': 'tanh', 'input': 50},\n",
" {'activation': 'softmax', 'input': 10}]\n"
]
}
],
"source": [
"model = DeepNN()\n",
"model.add(input=784)\n",
"model.add(input=50, activation=lr_activation[0])\n",
"model.add(input=10, activation='softmax')\n",
"pprint(model.layers)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"epochs:6\n",
"1/6 epochs time:3.2s cost:0.622414, acc:0.911883\n",
"2/6 epochs time:6.0s cost:0.535589, acc:0.926483\n",
"3/6 epochs time:6.2s cost:0.480691, acc:0.936250\n",
"4/6 epochs time:7.5s cost:0.434851, acc:0.942650\n",
"5/6 epochs time:5.6s cost:0.394968, acc:0.947933\n",
"6/6 epochs time:5.9s cost:0.360442, acc:0.952333\n",
"cost:0.360 accuracy:0.952 time:34.4s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>epochs</th>\n",
" <th>batch_size</th>\n",
" <th>activation</th>\n",
" <th>learning_rate</th>\n",
" <th>optimizer</th>\n",
" <th>lambd</th>\n",
" <th>regularization</th>\n",
" <th>print_cost</th>\n",
" <th>layer_dims</th>\n",
" <th>cost</th>\n",
" <th>accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>deep_l3_tanh_w_6.gz</th>\n",
" <td>6</td>\n",
" <td>128</td>\n",
" <td>tanh</td>\n",
" <td>0.05</td>\n",
" <td>gd</td>\n",
" <td>0.1</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>[784, 50, 10]</td>\n",
" <td>0.360442</td>\n",
" <td>0.952333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" epochs batch_size activation learning_rate optimizer \\\n",
"deep_l3_tanh_w_6.gz 6 128 tanh 0.05 gd \n",
"\n",
" lambd regularization print_cost layer_dims \\\n",
"deep_l3_tanh_w_6.gz 0.1 False True [784, 50, 10] \n",
"\n",
" cost accuracy \n",
"deep_l3_tanh_w_6.gz 0.360442 0.952333 "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"weight_file = 'deep_l{}_{}_w_{}.gz'.format(len(layer_dims), activation, epochs) \n",
"history_file = 'deep_l{}_{}_h_{}.gz'.format(len(layer_dims), activation, epochs)\n",
"\n",
"history_data = []\n",
"print(\"\\nepochs:{}\".format(epochs))\n",
"if not os.path.exists(weight_file):\n",
" parameters, costs, accuracy, total_time = model.fit(X_train, \n",
" categorical_Y_train, \n",
" activation, \n",
" optimizer, \n",
" learning_rate,\n",
" epochs, \n",
" batch_size,\n",
" print_cost=print_cost)\n",
" history_data.append(costs)\n",
" history_data.append(accuracy)\n",
" hyper_params['cost'] = costs[-1]\n",
" hyper_params['accuracy']= accuracy[-1]\n",
" print(\"cost:{:.3f} accuracy:{:.3f} time:{:.1f}s\".format(costs[-1], accuracy[-1], total_time))\n",
" \n",
" # save model weight\n",
" parameters['h'] = hyper_params\n",
" with open(weight_file, 'wb') as f:\n",
" joblib.dump(parameters, f, compress='gzip')\n",
" with open(history_file, 'wb') as f:\n",
" joblib.dump(history_data, f, compress='gzip')\n",
"else:\n",
" with open(weight_file, 'rb') as f:\n",
" parameters = joblib.load(f)\n",
" with open(history_file, 'rb') as f:\n",
" history_data = joblib.load(f)\n",
" print(\"loading parameters from {}\".format(weight_file))\n",
" hyper_params = parameters['h'] \n",
"pd.DataFrame(hyper_params,index=[weight_file,])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Cost and accuracy"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cost:0.360, accuracy:0.952\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"if os.path.exists(history_file):\n",
" with open(history_file, 'rb') as f:\n",
" history_data = joblib.load(f)\n",
" print(\"cost:{:.3f}, accuracy:{:.3f}\".format(history_data[0][-1],history_data[1][-1]))\n",
"plot_history(history_data, learning_rate=learning_rate)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Evaluate model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accuracy with Test data"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train accuracy: 95.2333%\n",
"test accuracy: 94.8600%\n"
]
}
],
"source": [
"# get all prediction\n",
"prob_train, _ = model.predict(parameters, X_train)\n",
"prob_test, _ = model.predict(parameters, X_test)\n",
"\n",
"# convert probability to prediction(0 ~ 9) \n",
"Y_pred_train = prob_train.argmax(axis=1)\n",
"Y_pred_test = prob_test.argmax(axis=1)\n",
"\n",
"# convert prediction to categorial data\n",
"categorical_Y_pred_train = to_categorical(Y_pred_train, 10)\n",
"categorical_Y_pred_test = to_categorical(Y_pred_test, 10)\n",
"acc_train = model.evaluate(categorical_Y_train, categorical_Y_pred_train)\n",
"acc_test = model.evaluate(categorical_Y_test, categorical_Y_pred_test)\n",
"print(\"train accuracy: {:.4%}\".format(acc_train))\n",
"print(\"test accuracy: {:.4%}\".format(acc_test))"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"showing predction 20 examples randomly...\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x216 with 20 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"num = 20 \n",
"print(\"\\nshowing predction {} examples randomly...\".format(num))\n",
"rand_idx = list(np.random.permutation(num))\n",
"\n",
"show_digits(X_test[rand_idx].reshape(num, 28,28), Y_test[rand_idx], Y_pred=Y_pred_test[rand_idx], num=num)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Failed predictions"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed predictions (514/10000)\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x216 with 20 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"failed_idx = np.where(Y_test != Y_pred_test)[0]\n",
"print(\"Failed predictions ({}/{})\".format(len(failed_idx), Y_test.shape[0]))\n",
"show_digits(X_test[failed_idx].reshape(failed_idx.shape[0], 28,28), Y_test[failed_idx], Y_pred_test[failed_idx], num=20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Confusion matrix and F1 Score"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"confusion_matrix\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 504x432 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"classification_report\n",
"\n",
" precision recall f1-score support\n",
"\n",
" 0 0.99 0.96 0.97 1012\n",
" 1 0.98 0.98 0.98 1136\n",
" 2 0.94 0.94 0.94 1030\n",
" 3 0.94 0.95 0.95 1004\n",
" 4 0.96 0.92 0.94 1028\n",
" 5 0.93 0.94 0.94 883\n",
" 6 0.94 0.96 0.95 946\n",
" 7 0.94 0.96 0.95 1010\n",
" 8 0.94 0.92 0.93 991\n",
" 9 0.91 0.96 0.93 960\n",
"\n",
" accuracy 0.95 10000\n",
" macro avg 0.95 0.95 0.95 10000\n",
"weighted avg 0.95 0.95 0.95 10000\n",
"\n"
]
}
],
"source": [
"from sklearn.metrics import classification_report, confusion_matrix\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"print('confusion_matrix')\n",
"plt.figure(figsize=(7, 6))\n",
"ax = plt.axes()\n",
"sns.heatmap(confusion_matrix(Y_pred_test, Y_test), cmap='Greys', annot=True, fmt=\"d\", ax=ax)\n",
"ax.set_title('mnist digits: lable {}'.format(\"Y_test\"))\n",
"plt.show()\n",
"\n",
"print('\\nclassification_report\\n\\n', classification_report(Y_pred_test, Y_test))\n",
"#report = classification_report(y_pred_test, Y_test, output_dict=True)\n",
"#pd.DataFrame(report).transpose()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Review result"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['./deep_l3_tanh_w_6.gz']\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>epochs</th>\n",
" <th>batch_size</th>\n",
" <th>activation</th>\n",
" <th>learning_rate</th>\n",
" <th>optimizer</th>\n",
" <th>lambd</th>\n",
" <th>regularization</th>\n",
" <th>print_cost</th>\n",
" <th>layer_dims</th>\n",
" <th>cost</th>\n",
" <th>accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>./deep_l3_tanh_w_6.gz</th>\n",
" <td>6</td>\n",
" <td>128</td>\n",
" <td>tanh</td>\n",
" <td>0.05</td>\n",
" <td>gd</td>\n",
" <td>0.1</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>[784, 50, 10]</td>\n",
" <td>0.360442</td>\n",
" <td>0.952333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" epochs batch_size activation learning_rate optimizer \\\n",
"./deep_l3_tanh_w_6.gz 6 128 tanh 0.05 gd \n",
"\n",
" lambd regularization print_cost layer_dims \\\n",
"./deep_l3_tanh_w_6.gz 0.1 False True [784, 50, 10] \n",
"\n",
" cost accuracy \n",
"./deep_l3_tanh_w_6.gz 0.360442 0.952333 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from glob import glob\n",
"\n",
"weight_files = glob('./deep_l*_w_*.gz')\n",
"print(weight_files)\n",
"\n",
"for i, weight_file in enumerate(weight_files):\n",
" with open(weight_file, 'rb') as f:\n",
" parameters = joblib.load(f)\n",
" hyper_params= parameters['h'] \n",
" if i == 0:\n",
" df = pd.DataFrame(hyper_params,index=[weight_file,])\n",
" else:\n",
" df = df.append(pd.DataFrame(hyper_params,index=[weight_file,]))\n",
"df.sort_values(by=['accuracy'], ascending=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "scrach-ml",
"language": "python",
"name": "scrach-ml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "286.997px"
},
"toc_section_display": true,
"toc_window_display": true
},
"toc-autonumbering": true
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment