Skip to content

Instantly share code, notes, and snippets.

@robinkraft
Last active November 29, 2016 03:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robinkraft/dcd103d7e29f6e9d06f0b5685cc2a44a to your computer and use it in GitHub Desktop.
Save robinkraft/dcd103d7e29f6e9d06f0b5685cc2a44a to your computer and use it in GitHub Desktop.
Showing difference between different methods for changing learning rates in Keras.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"path = '/home/ubuntu/nbs/data/statefarm/'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from keras.models import Sequential\n",
"from keras.layers.core import Flatten, Dense, Dropout, Lambda\n",
"from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D\n",
"from keras.layers import BatchNormalization\n",
"from keras.optimizers import SGD, RMSprop, Adam\n",
"from keras.preprocessing import image\n",
"from keras.regularizers import l2"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from utils import *"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 1500 images belonging to 10 classes.\n",
"Found 500 images belonging to 10 classes.\n"
]
}
],
"source": [
"batch_size = 64\n",
"spath = os.path.join(path, 'sample/')\n",
"batches = get_batches(spath+'train', batch_size=batch_size)\n",
"val_batches = get_batches(spath+'valid', batch_size=batch_size*2, shuffle=False)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/2\n",
"1500/1500 [==============================] - 25s - loss: 2.3571 - acc: 0.1827 - val_loss: 4.0777 - val_acc: 0.1340\n",
"Epoch 2/2\n",
"1500/1500 [==============================] - 21s - loss: 1.7186 - acc: 0.4460 - val_loss: 3.0927 - val_acc: 0.1740\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7fd1561900d0>"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = Sequential([\n",
" BatchNormalization(axis=1, input_shape=(3,224,224)),\n",
" Flatten(),\n",
" Dense(10, activation='softmax')\n",
" ])\n",
"model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
"model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, \n",
" nb_val_samples=val_batches.nb_sample)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<CudaNdarrayType(float32, scalar)>\n",
"9.99999974738e-06\n"
]
},
{
"ename": "AttributeError",
"evalue": "'float' object has no attribute 'get_value'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-30-26351f4bb067>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.001\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'float' object has no attribute 'get_value'"
]
}
],
"source": [
"print model.optimizer.lr\n",
"print model.optimizer.lr.get_value()\n",
"model.optimizer.lr = 0.001\n",
"print model.optimizer.lr.get_value()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Interesting! It starts out as a CudaNdarray scalar equal to `0.000009999...` (i.e. `1e-5`). When we then replace with the new learning rate, we can't use `.get_value()` anymore since `lr` is a float."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.001\n"
]
}
],
"source": [
"print model.optimizer.lr"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ok, no big deal. Let's run the next few epochs with the new learning rate"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/4\n",
"1500/1500 [==============================] - 25s - loss: 1.2876 - acc: 0.6360 - val_loss: 2.5079 - val_acc: 0.2160\n",
"Epoch 2/4\n",
"1500/1500 [==============================] - 20s - loss: 1.0604 - acc: 0.7273 - val_loss: 2.5639 - val_acc: 0.2360\n",
"Epoch 3/4\n",
"1500/1500 [==============================] - 19s - loss: 0.8822 - acc: 0.8073 - val_loss: 2.2749 - val_acc: 0.2640\n",
"Epoch 4/4\n",
"1500/1500 [==============================] - 19s - loss: 0.7658 - acc: 0.8387 - val_loss: 2.2044 - val_acc: 0.3080\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7fd156713bd0>"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches, \n",
" nb_val_samples=val_batches.nb_sample)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cool - 83% accuracy, with 30% validation accuracy. Let's see what happens with `.set_value`."
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/2\n",
"1500/1500 [==============================] - 25s - loss: 2.4243 - acc: 0.1900 - val_loss: 5.7273 - val_acc: 0.1780\n",
"Epoch 2/2\n",
"1500/1500 [==============================] - 19s - loss: 1.7234 - acc: 0.4207 - val_loss: 3.5982 - val_acc: 0.1760\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7fd1550d1a90>"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = Sequential([\n",
" BatchNormalization(axis=1, input_shape=(3,224,224)),\n",
" Flatten(),\n",
" Dense(10, activation='softmax')\n",
" ])\n",
"model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
"model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, \n",
" nb_val_samples=val_batches.nb_sample)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<CudaNdarrayType(float32, scalar)>\n",
"9.99999974738e-06\n",
"0.0010000000475\n"
]
}
],
"source": [
"print model.optimizer.lr\n",
"print model.optimizer.lr.get_value()\n",
"model.optimizer.lr.set_value(0.001)\n",
"print model.optimizer.lr.get_value()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/4\n",
"1500/1500 [==============================] - 25s - loss: 12.0653 - acc: 0.2060 - val_loss: 14.2787 - val_acc: 0.1040\n",
"Epoch 2/4\n",
"1500/1500 [==============================] - 19s - loss: 12.5922 - acc: 0.2113 - val_loss: 13.8512 - val_acc: 0.1200\n",
"Epoch 3/4\n",
"1500/1500 [==============================] - 19s - loss: 12.6270 - acc: 0.2080 - val_loss: 14.0240 - val_acc: 0.1160\n",
"Epoch 4/4\n",
"1500/1500 [==============================] - 19s - loss: 12.3764 - acc: 0.2260 - val_loss: 12.7715 - val_acc: 0.1960\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7fd154d7a050>"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches, \n",
" nb_val_samples=val_batches.nb_sample)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And so we end up with 22% accuracy and 19% validation accuracy. \n",
"With lr = xx, we had 83% accuracy and 30% validation accuracy."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now for a control group!"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/6\n",
"1500/1500 [==============================] - 25s - loss: 2.3110 - acc: 0.2273 - val_loss: 4.7475 - val_acc: 0.1480\n",
"Epoch 2/6\n",
"1500/1500 [==============================] - 23s - loss: 1.6814 - acc: 0.4580 - val_loss: 3.6211 - val_acc: 0.1640\n",
"Epoch 3/6\n",
"1500/1500 [==============================] - 20s - loss: 1.2803 - acc: 0.6320 - val_loss: 2.9282 - val_acc: 0.1740\n",
"Epoch 4/6\n",
"1500/1500 [==============================] - 20s - loss: 1.0394 - acc: 0.7387 - val_loss: 2.4401 - val_acc: 0.2000\n",
"Epoch 5/6\n",
"1500/1500 [==============================] - 19s - loss: 0.8947 - acc: 0.7940 - val_loss: 2.3736 - val_acc: 0.2120\n",
"Epoch 6/6\n",
"1500/1500 [==============================] - 20s - loss: 0.7411 - acc: 0.8493 - val_loss: 2.2444 - val_acc: 0.2800\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7fd153d47050>"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = Sequential([\n",
" BatchNormalization(axis=1, input_shape=(3,224,224)),\n",
" Flatten(),\n",
" Dense(10, activation='softmax')\n",
" ])\n",
"model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
"model.fit_generator(batches, batches.nb_sample, nb_epoch=6, validation_data=val_batches, \n",
" nb_val_samples=val_batches.nb_sample)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"With no change to the learning rate, we get 85% accuracy and 28% validation accuracy. That's pretty close to the results from Jeremy's first model adjusting the learning rate via `model.optimizer.lr=0.001`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So I would bet that the `lr=` method doesn't actually affect the model. Keras is keeping track of the actual value behind the scenes."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`equals : 83% acc 30% val_acc` \n",
"`set_value : 22% acc 19% val_acc` \n",
"`no change : 85% acc 28% val_acc`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment