Skip to content

Instantly share code, notes, and snippets.

@alessiot
Last active October 14, 2019 17:16
Show Gist options
  • Save alessiot/f90d026acd27f50865fb9eb73944358d to your computer and use it in GitHub Desktop.
Save alessiot/f90d026acd27f50865fb9eb73944358d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
" # TensorFlow with a ODE Solver\n",
" \n",
" Adapted from [here](https://medium.com/r/?url=https%3A%2F%2Fwww.tensorflow.org%2Ftutorials%2Feager%2Fcustom_training_walkthrough)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## The MNIST Dataset"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"from keras.utils import np_utils\n",
"from keras import datasets\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"\n",
"if not sys.warnoptions:\n",
" import warnings\n",
" warnings.simplefilter(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"def reduce_dataset(x,y, classes=(0,1)):\n",
" x_redu = []\n",
" y_redu = []\n",
" for i in range(len(y)):\n",
" if y[i] in classes:\n",
" x_redu.append(x[i])\n",
" y_redu.append(y[i])\n",
" return np.array(x_redu), np.array(y_redu)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"(X_train, y_train), (X_val, y_val) = datasets.fashion_mnist.load_data()#tf.keras.datasets.mnist.load_data()\n",
"\n",
"X = np.concatenate((X_train, X_val), axis=0)\n",
"y = np.concatenate((y_train, y_val), axis=0)\n",
"\n",
"X, y = reduce_dataset(X, y, (0,1,2,3,4))\n",
"\n",
"# subsample\n",
"X_train, X_val, y_train, y_val = train_test_split(X, y, \n",
" train_size = 500, \n",
" test_size = 500, stratify=y, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([0, 1, 2, 3, 4], dtype=uint8), array([100, 100, 100, 100, 100]))"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.unique(y_train, return_counts=True)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([0, 1, 2, 3, 4], dtype=uint8), array([100, 100, 100, 100, 100]))"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.unique(y_val, return_counts=True)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train matrix shape (500, 28, 28)\n",
"Test matrix shape (500, 28, 28)\n"
]
}
],
"source": [
"# print the initial input shape ready for training\n",
"print(\"Train matrix shape\", X_train.shape)\n",
"print(\"Test matrix shape\", X_val.shape)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X_train shape: (500, 784)\n",
"y_train [2 4 3 0 1 0 0 3 0 1 4 0 3 0 0 4 1 3 3 1 1 1 2 2 4 2 0 4 4 3 4 1 1 2 2 0 0\n",
" 2 1 4 3 4 0 0 2 4 0 2 0 2 1 0 3 2 3 4 4 3 3 0 2 1 2 3 1 0 4 4 4 2 1 3 0 1\n",
" 4 4 2 2 0 1 3 4 2 3 3 2 3 4 2 2 3 0 3 1 1 2 0 4 3 0 4 2 1 0 2 1 0 1 3 3 1\n",
" 2 0 1 4 3 4 1 2 4 3 0 1 2 2 4 0 2 3 2 1 1 1 0 1 4 2 1 2 1 2 4 1 3 1 0 4 4\n",
" 4 2 1 3 0 3 1 4 3 2 3 4 2 2 4 4 2 3 1 3 1 1 0 0 1 0 2 4 4 4 0 1 4 1 1 1 2\n",
" 0 2 3 2 4 3 0 1 2 1 2 0 2 4 3 1 3 1 1 3 0 0 0 2 1 2 1 0 2 1 2 2 3 2 1 3 0\n",
" 4 0 2 1 3 2 4 4 1 3 2 3 1 4 2 3 3 1 1 1 1 0 1 1 2 4 2 0 4 4 4 4 4 0 4 3 2\n",
" 0 4 1 4 3 4 2 3 3 0 4 0 3 1 0 1 4 4 3 1 1 1 2 2 2 1 2 4 1 1 2 4 4 2 0 0 0\n",
" 4 3 2 2 3 1 4 1 0 3 0 4 1 2 3 0 4 2 1 3 1 3 0 0 0 1 0 4 0 0 1 3 1 2 2 3 2\n",
" 3 2 2 2 4 3 1 0 4 2 0 4 2 3 2 2 3 0 2 4 1 3 3 2 2 3 0 3 4 0 4 2 2 4 1 3 0\n",
" 3 0 4 4 4 3 3 0 0 1 3 3 1 4 3 0 4 0 2 4 0 2 0 4 0 3 0 0 2 3 0 0 2 3 1 4 4\n",
" 1 3 0 4 4 1 0 3 3 1 3 3 4 1 3 4 1 0 4 0 3 3 2 0 0 4 1 3 0 2 0 3 1 1 1 2 1\n",
" 4 0 0 0 3 2 3 2 2 1 3 4 4 1 2 3 3 3 1 3 2 3 0 4 0 2 3 4 0 1 4 0 2 1 0 4 2\n",
" 1 0 4 1 4 4 3 4 3 0 3 0 3 4 3 4 2 2 0]\n",
"Shape after one-hot encoding (train): (500, 5)\n",
"Shape after one-hot encoding (val): (500, 5)\n"
]
}
],
"source": [
"# building the input vector from the 28x28 pixels\n",
"X_train = X_train.reshape(X_train.shape[0], 784)\n",
"X_val = X_val.reshape(X_val.shape[0], 784)\n",
"X_train = X_train.astype('float32')\n",
"X_val = X_val.astype('float32')\n",
"\n",
"# normalizing the data to help with the training\n",
"X_train /= 255\n",
"X_val /= 255\n",
"\n",
"print('X_train shape:', X_train.shape)\n",
"print(\"y_train\", y_train)\n",
"\n",
"# one-hot encoding using keras' numpy-related utilities\n",
"n_classes = 5\n",
"y_train = np_utils.to_categorical(y_train, n_classes)\n",
"y_val = np_utils.to_categorical(y_val, n_classes)\n",
"print(\"Shape after one-hot encoding (train): \", y_train.shape)\n",
"print(\"Shape after one-hot encoding (val): \", y_val.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## TensorFlow custom training"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TensorFlow version: 1.14.0\n",
"Eager execution: True\n"
]
}
],
"source": [
"from __future__ import absolute_import, division, print_function, unicode_literals\n",
"\n",
"import os\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import tensorflow as tf\n",
"\n",
"tf.enable_eager_execution()\n",
"\n",
"from tensorflow import contrib\n",
"tfe = contrib.eager\n",
"\n",
"from scipy.integrate import ode\n",
"import multiprocessing as mp\n",
"\n",
"print(\"TensorFlow version: {}\".format(tf.__version__))\n",
"print(\"Eager execution: {}\".format(tf.executing_eagerly()))"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<DatasetV1Adapter shapes: ((?, 784), (?, 5)), types: (tf.float32, tf.float32)>\n",
"Prediction: [1 4 1 4 0 0 0 0 1 4 1 0 1 1 4 0 4 0 0 4 0 4 0 1 1 1 4 1 1 0 1 4 4 1 0 1 1\n",
" 1 4 1 0 1 0 0 1 1 1 1 4 1 4 4 4 4 0 4 4 4 0 1 1 1 1 4 4 0 1 0 0 1 4 4 0 4\n",
" 4 0 1 1 4 4 0 1 1 0 4 1 4 1 1 0 0 0 4 1 4 1 4 0 0 1 1 0 4 4 1 2 1 4 4 0 1\n",
" 1 1 0 4 1 1 4 1 0 4 4 1 1 1 1 4 1 0 4 0 4 4 1 4 0 4 4 1 4 1 4 4 0 4 4 1 0\n",
" 0 4 4 0 4 1 1 0 0 1 4 1 1 1 1 1 0 0 4 0 4 4 1 4 4 4 1 0 4 1 2 4 0 4 4 4 1\n",
" 4 1 0 1 1 0 0 4 1 1 1 4 1 1 0 4 0 4 4 4 4 1 0 0 1 1 4 4 1 4 1 1 0 1 4 4 0\n",
" 1 0 1 4 1 0 0 1 1 4 2 0 0 1 1 4 1 4 4 4 4 1 4 4 1 0 0 0 1 4 1 0 1 1 0 0 1\n",
" 4 1 0 4 4 1 0 4 1 1 4 4 4 4 2 4 1 1 0 4 4 1 4 1 1 4 1 1 4 1 0 1 0 1 0 0 1\n",
" 1 0 1 1 1 4 4 4 0 0 4 4 4 1 0 1 1 0 4 4 4 0 0 4 4 4 0 4 1 2 4 0 1 1 1 0 1\n",
" 1 1 4 0 0 4 4 4 0 1 1 1 1 0 1 1 4 1 1 0 4 4 0 1 0 4 0 0 4 4 1 1 1 0 4 0 4\n",
" 4 4 4 0 1 0 4 4 4 4 4 0 4 0 4 4 1 4 1 4 4 1 4 0 1 4 1 0 0 0 1 1 1 4 4 0 0\n",
" 4 4 0 1 0 4 4 4 4 4 0 1 0 4 0 0 0 1 4 0 0 0 1 1 4 1 4 4 1 1 1 1 4 4 1 1 4\n",
" 0 2 1 1 0 1 0 1 1 4 1 1 0 1 1 0 1 0 1 4 0 4 1 1 1 1 4 4 4 4 0 4 0 4 0 1 1\n",
" 4 4 1 1 0 1 1 1 4 4 0 1 0 4 0 1 4 4 4]\n",
" Labels: [[0. 0. 1. 0. 0.]\n",
" [0. 0. 0. 0. 1.]\n",
" [0. 0. 0. 1. 0.]\n",
" ...\n",
" [0. 0. 1. 0. 0.]\n",
" [0. 0. 1. 0. 0.]\n",
" [1. 0. 0. 0. 0.]]\n",
"Loss test: 1.6121267080307007\n",
"Step: 0, Initial Loss: 1.6121267080307007\n",
"Step: 1, Loss: 1.610520839691162\n"
]
}
],
"source": [
"batch_size = X_train.shape[0]#50\n",
"\n",
"features, labels = X_train, y_train\n",
"features_val, labels_val = X_val, y_val\n",
"\n",
"features = tf.convert_to_tensor(features)\n",
"labels = tf.convert_to_tensor(labels)\n",
"features_val = tf.convert_to_tensor(features_val)\n",
"labels_val = tf.convert_to_tensor(labels_val)\n",
"\n",
"train_dataset = tf.data.Dataset.from_tensor_slices((features, labels))\n",
"train_dataset = train_dataset.shuffle(X_train.shape[0])\n",
"train_dataset = train_dataset.batch(batch_size=batch_size)#X_train_04.shape[0]\n",
"print(train_dataset)\n",
"\n",
"#for x, y in train_dataset:\n",
"# print(x.shape, y.shape)\n",
"\n",
"\n",
"layer_dims = [[784,512],[512],[512,512],[512],[512,n_classes],[n_classes]]\n",
" \n",
"model = tf.keras.Sequential([\n",
" tf.keras.layers.Dense(layer_dims[0][1], activation=tf.nn.relu, input_shape=(layer_dims[0][0],)), \n",
" tf.keras.layers.Dense(layer_dims[2][1], activation=tf.nn.relu), \n",
" tf.keras.layers.Dense(layer_dims[4][1], activation=tf.nn.softmax)\n",
"])\n",
"\n",
"# save weights to reinitialize later to same ones\n",
"Wsave = model.get_weights()\n",
"\n",
"predictions = model(features)\n",
"#predictions[:5]\n",
"\n",
"#tf.nn.softmax(predictions[:5])\n",
"\n",
"print(\"Prediction: {}\".format(tf.argmax(predictions, axis=1)))\n",
"print(\" Labels: {}\".format(labels))\n",
"\n",
"def loss(model, x, y):\n",
" y_ = model(x)\n",
" return tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_)\n",
"\n",
"l = loss(model, features, labels)\n",
"print(\"Loss test: {}\".format(l))\n",
"\n",
"\n",
"def grad(model, inputs, targets):\n",
" with tf.GradientTape() as tape:\n",
" loss_value = loss(model, inputs, targets)\n",
" return loss_value, tape.gradient(loss_value, model.trainable_variables)\n",
"\n",
"optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)\n",
"\n",
"global_step = tf.Variable(0)\n",
"\n",
"loss_value, grads = grad(model, features, labels)\n",
"\n",
"print(\"Step: {}, Initial Loss: {}\".format(global_step.numpy(),\n",
" loss_value.numpy()))\n",
"\n",
"optimizer.apply_gradients(zip(grads, model.trainable_variables), global_step)\n",
"\n",
"print(\"Step: {}, Loss: {}\".format(global_step.numpy(),\n",
" loss(model, features, labels).numpy()))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(35000, 784) (35000,)\n",
"Shape before one-hot encoding: (35000,)\n",
"Shape after one-hot encoding: (35000, 5)\n"
]
}
],
"source": [
"X_all = X.copy()\n",
"y_all = y.copy()\n",
"\n",
"# building the input vector from the 28x28 pixels\n",
"X_all = X_all.reshape(X_all.shape[0], 784)\n",
"X_all = X_all.astype('float32')\n",
"\n",
"# normalizing the data to help with the training\n",
"X_all /= 255\n",
"\n",
"print(X_all.shape, y_all.shape)\n",
"\n",
"# one-hot encoding using keras' numpy-related utilities\n",
"print(\"Shape before one-hot encoding: \", y_all.shape)\n",
"y_all = np_utils.to_categorical(y_all, n_classes)\n",
"print(\"Shape after one-hot encoding: \", y_all.shape)\n",
"\n",
"features_all, labels_all = X_all, y_all\n",
"features_all = tf.convert_to_tensor(features_all)\n",
"labels_all = tf.convert_to_tensor(labels_all)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# indexes to update weights\n",
"prev_idx = 0\n",
"idx2 = []\n",
"idx1 = []\n",
"for i in range(len(layer_dims)):\n",
" idx1.append(prev_idx)\n",
" curr_idx = np.prod(layer_dims[i])\n",
" prev_idx = prev_idx + curr_idx\n",
" idx2.append(prev_idx)\n",
"\n",
"def dWdt(t, u, f_args):\n",
" \n",
" model = f_args[0]\n",
" x = f_args[1]\n",
" y = f_args[2]\n",
" layer_dims = f_args[3]\n",
" idx1 = f_args[4]\n",
" idx2 = f_args[5]\n",
" \n",
" [tf.assign(model.trainable_variables[i], \n",
" u[idx1[i]:idx2[i]].reshape(layer_dims[i])) for i in range(len(layer_dims))]\n",
" \n",
" #print(\"dWdt - iteration\",t, model.trainable_variables)\n",
" #print(\"dWdt - iteration\",t)\n",
" _, grads = grad(model, x, y)\n",
"\n",
" # Minimize gradients, so add -1 *\n",
" new_grads = [-1 * p.numpy().flatten() for (i,p) in enumerate(grads)]\n",
" new_grads = np.concatenate(new_grads)\n",
" \n",
" return new_grads"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"from sys import stdout\n",
"\n",
"def train_tf(num_epochs, stiff_training, train_dataset, optimizer, max_epoch_no_impr = 50, epoch_to_vis=50):\n",
" \n",
" times = []\n",
" train_loss_results = []\n",
" train_accuracy_results = []\n",
" val_loss_results = []\n",
" val_accuracy_results = []\n",
" all_loss_results = []\n",
" all_accuracy_results = []\n",
" \n",
" best_validation_accuracy = 0.0\n",
" last_improvement = 0\n",
" require_improvement = max_epoch_no_impr\n",
" improved_str = ''\n",
" impr_perc = 0\n",
" best_weights = model.get_weights()\n",
" \n",
" start_time = time.time()\n",
" \n",
" # numpy initial weights\n",
" w0 = [p.numpy().flatten() for (i, p) in enumerate(model.trainable_variables)]\n",
" w0 = np.concatenate(w0)\n",
"\n",
" ##### ODE solver - initial settings\n",
" t0 = 0.0\n",
" tfin = 0.125\n",
" solver = ode(dWdt)\n",
" #solver.set_integrator(\"vode\", atol=1e-8, rtol=1e-6, method=\"bdf\") #adam, non-stiff \n",
" solver.set_integrator(\"zvode\", atol=1e-8, rtol=1e-6) \n",
" #####\n",
" for epoch in range(num_epochs):\n",
" epoch_loss_avg = tfe.metrics.Mean()\n",
" epoch_accuracy = tfe.metrics.Accuracy()\n",
" epoch_loss_avg_val = tfe.metrics.Mean()\n",
" epoch_accuracy_val = tfe.metrics.Accuracy()\n",
" epoch_loss_avg_all = tfe.metrics.Mean()\n",
" epoch_accuracy_all = tfe.metrics.Accuracy()\n",
"\n",
" # Training loop - using mini-batches\n",
" btch_n = 1\n",
" for x, y in train_dataset:\n",
"\n",
" loss_value, grads = grad(model, x, y)\n",
" \n",
" if stiff_training:\n",
"\n",
" solver.set_initial_value(w0, t0)\n",
" solver.set_f_params((model, x, y, layer_dims, idx1, idx2))\n",
" solver.integrate(tfin, step=False)\n",
" w0 = solver.y\n",
" #update weights\n",
" [tf.assign(model.trainable_variables[i], \n",
" w0[idx1[i]:idx2[i]].reshape(layer_dims[i])) for i in range(len(layer_dims))]\n",
"\n",
" loss_value, grads = grad(model, x, y) \n",
" \n",
" else: \n",
" #update weights\n",
" optimizer.apply_gradients(zip(grads, model.trainable_variables),\n",
" global_step)\n",
"\n",
"\n",
" # Track progress\n",
" epoch_loss_avg(loss_value) # add current batch loss\n",
" epoch_loss_avg_val(loss(model, features_val, labels_val)) # add current validation\n",
" epoch_loss_avg_all(loss(model, features_all, labels_all)) \n",
" # compare predicted label to actual label\n",
" #epoch_accuracy(tf.argmax(model(x), axis=1, output_type=tf.int32), y)\n",
" epoch_accuracy(tf.argmax(model(x), axis=1, output_type=tf.int32), \n",
" tf.argmax(y, axis=1, output_type=tf.int32)) \n",
" epoch_accuracy_val(tf.argmax(model(features_val), axis=1, output_type=tf.int32), \n",
" tf.argmax(labels_val, axis=1, output_type=tf.int32)) \n",
" epoch_accuracy_all(tf.argmax(model(features_all), axis=1, output_type=tf.int32), \n",
" tf.argmax(labels_all, axis=1, output_type=tf.int32)) \n",
"\n",
" btch_n = btch_n + 1\n",
" \n",
" #print(\"End of batch -- time (s): {:03f}, Epoch: {:03d}, batch_no: {:03d}, Loss: {:.3f}, Accuracy: {:.3%}, Val Loss: {:.3f}, Val Accuracy: {:.3%}, Full Loss: {:.3f}, Full Accuracy: {:.3%}\".format(time.time() - start_time,\n",
" # j, btch_n,\n",
" # epoch_loss_avg.result(),\n",
" # epoch_accuracy.result(),\n",
" # epoch_loss_avg_val.result(),\n",
" # epoch_accuracy_val.result(),\n",
" # epoch_loss_avg_all.result(),\n",
" # epoch_accuracy_all.result()))\n",
" \n",
" # end epoch\n",
" train_loss_results.append(epoch_loss_avg.result())\n",
" train_accuracy_results.append(epoch_accuracy.result())\n",
" val_loss_results.append(epoch_loss_avg_val.result())\n",
" val_accuracy_results.append(epoch_accuracy_val.result())\n",
" all_loss_results.append(epoch_loss_avg_all.result())\n",
" all_accuracy_results.append(epoch_accuracy_all.result())\n",
" times.append(time.time() - start_time)\n",
" \n",
" if epoch_accuracy_val.result() - best_validation_accuracy >0.005:\n",
" # Update the best-known validation accuracy.\n",
" impr_perc = epoch_accuracy_val.result() - best_validation_accuracy\n",
" best_validation_accuracy = epoch_accuracy_val.result()\n",
" best_weights = model.get_weights()\n",
" \n",
" # Set the iteration for the last improvement to current.\n",
" last_improvement = epoch\n",
"\n",
" # A string to be printed below, shows improvement found.\n",
" improved_str = 'BEST (Val): ' + '{:.3%}'.format(epoch_accuracy_val.result())\n",
" else:\n",
" impr_perc = epoch_accuracy_val.result() - best_validation_accuracy\n",
"\n",
" if epoch % epoch_to_vis == 0:\n",
" stdout.write(\"time (s): {:03f}, Epoch: {:03d}, Loss: {:.3f}, Accuracy: {:.3%}, Val Loss: {:.3f}, Val Accuracy: {:.3%}, Full Loss: {:.3f}, Full Accuracy: {:.3%}, {}, last best: {}, impr %: {:.3%}\\r\".format(time.time() - start_time,\n",
" epoch,\n",
" epoch_loss_avg.result(),\n",
" epoch_accuracy.result(),\n",
" epoch_loss_avg_val.result(),\n",
" epoch_accuracy_val.result(),\n",
" epoch_loss_avg_all.result(),\n",
" epoch_accuracy_all.result(),\n",
" improved_str,\n",
" -epoch + last_improvement,\n",
" impr_perc))\n",
" stdout.flush()\n",
"\n",
" # If no improvement found in the required number of iterations.\n",
" if epoch - last_improvement > require_improvement:\n",
" stdout.write(\"\\n\") # move the cursor to the next line\n",
" print(\"time (s): {:03f}, Epoch: {:03d}, Loss: {:.3f}, Accuracy: {:.3%}, Val Loss: {:.3f}, Val Accuracy: {:.3%}, Full Loss: {:.3f}, Full Accuracy: {:.3%}, {}, last best: {}, impr %: {:.3%}\".format(time.time() - start_time,\n",
" epoch,\n",
" epoch_loss_avg.result(),\n",
" epoch_accuracy.result(),\n",
" epoch_loss_avg_val.result(),\n",
" epoch_accuracy_val.result(),\n",
" epoch_loss_avg_all.result(),\n",
" epoch_accuracy_all.result(),\n",
" improved_str,\n",
" -epoch + last_improvement,\n",
" impr_perc))\n",
" print(\"No improvement found in a while, stopping optimization.\")\n",
" # Break out from the for-loop.\n",
" break\n",
"\n",
" t0 = solver.t\n",
" tfin = solver.t * 2\n",
"\n",
" \n",
" stdout.write(\"\\n\") # move the cursor to the next line\n",
" \n",
" return train_loss_results, train_accuracy_results, val_loss_results, val_accuracy_results, all_loss_results, all_accuracy_results, best_weights, times\n"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<tensorflow.python.training.gradient_descent.GradientDescentOptimizer object at 0x1c504a5128>\n",
"time (s): 88.328985, Epoch: 200, Loss: 1.048, Accuracy: 88.600%, Val Loss: 1.090, Val Accuracy: 83.400%, Full Loss: 1.089, Full Accuracy: 83.317%, BEST (Val): 85.400%, last best: -11, impr %: -2.000%\n",
"time (s): 105.511589, Epoch: 240, Loss: 1.035, Accuracy: 90.000%, Val Loss: 1.085, Val Accuracy: 83.000%, Full Loss: 1.084, Full Accuracy: 83.437%, BEST (Val): 85.400%, last best: -51, impr %: -2.400%\n",
"No improvement found in a while, stopping optimization.\n",
"\n",
"<tensorflow.python.training.momentum.MomentumOptimizer object at 0x1c504a5400>\n",
"time (s): 44.019544, Epoch: 100, Loss: 0.995, Accuracy: 93.000%, Val Loss: 1.083, Val Accuracy: 82.000%, Full Loss: 1.077, Full Accuracy: 82.494%, BEST (Val): 85.200%, last best: -39, impr %: -3.200%\n",
"time (s): 49.564251, Epoch: 112, Loss: 0.970, Accuracy: 94.000%, Val Loss: 1.071, Val Accuracy: 82.600%, Full Loss: 1.067, Full Accuracy: 83.674%, BEST (Val): 85.200%, last best: -51, impr %: -2.600%\n",
"No improvement found in a while, stopping optimization.\n",
"\n",
"<tensorflow.python.training.adadelta.AdadeltaOptimizer object at 0x1c504a55c0>\n",
"time (s): 91.573524, Epoch: 200, Loss: 1.073, Accuracy: 89.000%, Val Loss: 1.097, Val Accuracy: 84.400%, Full Loss: 1.097, Full Accuracy: 84.106%, BEST (Val): 85.000%, last best: -9, impr %: -0.600%%\n",
"time (s): 110.399443, Epoch: 242, Loss: 1.056, Accuracy: 89.000%, Val Loss: 1.087, Val Accuracy: 85.000%, Full Loss: 1.087, Full Accuracy: 84.529%, BEST (Val): 85.000%, last best: -51, impr %: 0.000%\n",
"No improvement found in a while, stopping optimization.\n",
"\n"
]
}
],
"source": [
"### Full dataset\n",
"\n",
"num_epochs = 5001\n",
"stiff_training = False\n",
"\n",
"optimizers = [tf.train.GradientDescentOptimizer(learning_rate=0.1),\n",
" tf.train.MomentumOptimizer(use_nesterov = True, learning_rate = 0.1, momentum=0.9),\n",
" tf.train.AdadeltaOptimizer(learning_rate=0.1)\n",
" ]\n",
"\n",
"train_loss = []\n",
"train_accuracy = []\n",
"val_loss = []\n",
"val_accuracy = []\n",
"all_loss = []\n",
"all_accuracy = []\n",
"weights_save = []\n",
"train_times = []\n",
"for optimizer in optimizers:\n",
" print(optimizer)\n",
" model.set_weights(Wsave) # reset weights as for the previous training\n",
" train_loss_results, train_accuracy_results, val_loss_results, val_accuracy_results, all_loss_results, all_accuracy_results, best_weights, times = train_tf(num_epochs, stiff_training, train_dataset, optimizer, epoch_to_vis=50)\n",
" train_loss.append(train_loss_results) \n",
" train_accuracy.append(train_accuracy_results)\n",
" val_loss.append(val_loss_results)\n",
" val_accuracy.append(val_accuracy_results)\n",
" all_loss.append(all_loss_results)\n",
" all_accuracy.append(all_accuracy_results)\n",
" weights_save.append(best_weights)\n",
" train_times.append(times)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"model.set_weights(Wsave) # reset weights as for the previous training"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"time (s): 871.094145, Epoch: 030, Loss: 1.018, Accuracy: 92.000%, Val Loss: 1.073, Val Accuracy: 84.600%, Full Loss: 1.071, Full Accuracy: 84.789%, BEST (Val): 85.200%, last best: -11, impr %: -0.600%\n",
"time (s): 871.095756, Epoch: 030, Loss: 1.018, Accuracy: 92.000%, Val Loss: 1.073, Val Accuracy: 84.600%, Full Loss: 1.071, Full Accuracy: 84.789%, BEST (Val): 85.200%, last best: -11, impr %: -0.600%\n",
"No improvement found in a while, stopping optimization.\n",
"\n"
]
}
],
"source": [
"num_epochs = 501\n",
"stiff_training = True\n",
"\n",
"train_loss_results_ode, train_accuracy_results_ode, val_loss_results_ode, val_accuracy_results_ode, all_loss_results_ode, all_accuracy_results_ode, best_weights_ode, times_ode = train_tf(num_epochs, stiff_training, train_dataset, optimizer=None, max_epoch_no_impr=10, epoch_to_vis=2)\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(range(len(all_accuracy[0])), all_accuracy[0], '.', color='g', label='Gradient Descend (0.1)')\n",
"plt.plot(range(len(all_accuracy[1])), all_accuracy[1], '.', color='c', label='Nesterov (0.1)')\n",
"plt.plot(range(len(all_accuracy[2])), all_accuracy[2], '.', color='m', label='Adadelta (0.1)')\n",
"plt.plot(range(len(all_accuracy_results_ode)), all_accuracy_results_ode, '.', color='b',label='ODE Solver')\n",
"plt.semilogx()\n",
"plt.xlabel('Number of Iterations')\n",
"plt.ylabel('Accuracy (Whole Dataset)')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(train_times[0], all_accuracy[0], '.', color='g', label='Gradient Descend (0.1)')\n",
"plt.plot(train_times[1], all_accuracy[1], '.', color='c', label='Nesterov (0.1)')\n",
"plt.plot(train_times[2], all_accuracy[2], '.', color='m', label='Adadelta (0.1)')\n",
"plt.plot(times_ode, all_accuracy_results_ode, '.', color='b',label='ODE Solver')#o-\n",
"plt.semilogx()\n",
"plt.xlabel('Time [s]')\n",
"plt.ylabel('Accuracy (Whole Dataset)')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 84.386%\n",
"1 85.077%\n",
"2 83.786%\n"
]
}
],
"source": [
"for i in range(len(weights_save)):\n",
" model.set_weights(weights_save[i])\n",
" accuracy_all = tfe.metrics.Accuracy()\n",
" accuracy_all(tf.argmax(model(features_all), axis=1, output_type=tf.int32),\n",
" tf.argmax(labels_all, axis=1, output_type=tf.int32))\n",
" print(i, '{:.3%}'.format(accuracy_all.result()))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"84.531%\n"
]
}
],
"source": [
"model.set_weights(best_weights_ode)\n",
"accuracy_all = tfe.metrics.Accuracy()\n",
"accuracy_all(tf.argmax(model(features_all), axis=1, output_type=tf.int32),\n",
" tf.argmax(labels_all, axis=1, output_type=tf.int32))\n",
"print('{:.3%}'.format(accuracy_all.result()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment