Last active
November 29, 2016 03:17
-
-
Save robinkraft/dcd103d7e29f6e9d06f0b5685cc2a44a to your computer and use it in GitHub Desktop.
Showing difference between different methods for changing learning rates in Keras.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"path = '/home/ubuntu/nbs/data/statefarm/'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from keras.models import Sequential\n", | |
"from keras.layers.core import Flatten, Dense, Dropout, Lambda\n", | |
"from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D\n", | |
"from keras.layers import BatchNormalization\n", | |
"from keras.optimizers import SGD, RMSprop, Adam\n", | |
"from keras.preprocessing import image\n", | |
"from keras.regularizers import l2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from utils import *" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Found 1500 images belonging to 10 classes.\n", | |
"Found 500 images belonging to 10 classes.\n" | |
] | |
} | |
], | |
"source": [ | |
"batch_size = 64\n", | |
"spath = os.path.join(path, 'sample/')\n", | |
"batches = get_batches(spath+'train', batch_size=batch_size)\n", | |
"val_batches = get_batches(spath+'valid', batch_size=batch_size*2, shuffle=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/2\n", | |
"1500/1500 [==============================] - 25s - loss: 2.3571 - acc: 0.1827 - val_loss: 4.0777 - val_acc: 0.1340\n", | |
"Epoch 2/2\n", | |
"1500/1500 [==============================] - 21s - loss: 1.7186 - acc: 0.4460 - val_loss: 3.0927 - val_acc: 0.1740\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7fd1561900d0>" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = Sequential([\n", | |
" BatchNormalization(axis=1, input_shape=(3,224,224)),\n", | |
" Flatten(),\n", | |
" Dense(10, activation='softmax')\n", | |
" ])\n", | |
"model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n", | |
"model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, \n", | |
" nb_val_samples=val_batches.nb_sample)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<CudaNdarrayType(float32, scalar)>\n", | |
"9.99999974738e-06\n" | |
] | |
}, | |
{ | |
"ename": "AttributeError", | |
"evalue": "'float' object has no attribute 'get_value'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-30-26351f4bb067>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.001\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m: 'float' object has no attribute 'get_value'" | |
] | |
} | |
], | |
"source": [ | |
"print model.optimizer.lr\n", | |
"print model.optimizer.lr.get_value()\n", | |
"model.optimizer.lr = 0.001\n", | |
"print model.optimizer.lr.get_value()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Interesting! It starts out as a CudaNdarray scalar equal to `0.000009999...` (i.e. `1e-5`). When we then replace with the new learning rate, we can't use `.get_value()` anymore since `lr` is a float." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.001\n" | |
] | |
} | |
], | |
"source": [ | |
"print model.optimizer.lr" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Ok, no big deal. Let's run the next few epochs with the new learning rate" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/4\n", | |
"1500/1500 [==============================] - 25s - loss: 1.2876 - acc: 0.6360 - val_loss: 2.5079 - val_acc: 0.2160\n", | |
"Epoch 2/4\n", | |
"1500/1500 [==============================] - 20s - loss: 1.0604 - acc: 0.7273 - val_loss: 2.5639 - val_acc: 0.2360\n", | |
"Epoch 3/4\n", | |
"1500/1500 [==============================] - 19s - loss: 0.8822 - acc: 0.8073 - val_loss: 2.2749 - val_acc: 0.2640\n", | |
"Epoch 4/4\n", | |
"1500/1500 [==============================] - 19s - loss: 0.7658 - acc: 0.8387 - val_loss: 2.2044 - val_acc: 0.3080\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7fd156713bd0>" | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches, \n", | |
" nb_val_samples=val_batches.nb_sample)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Cool - 83% accuracy, with 30% validation accuracy. Let's see what happens with `.set_value`." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/2\n", | |
"1500/1500 [==============================] - 25s - loss: 2.4243 - acc: 0.1900 - val_loss: 5.7273 - val_acc: 0.1780\n", | |
"Epoch 2/2\n", | |
"1500/1500 [==============================] - 19s - loss: 1.7234 - acc: 0.4207 - val_loss: 3.5982 - val_acc: 0.1760\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7fd1550d1a90>" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = Sequential([\n", | |
" BatchNormalization(axis=1, input_shape=(3,224,224)),\n", | |
" Flatten(),\n", | |
" Dense(10, activation='softmax')\n", | |
" ])\n", | |
"model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n", | |
"model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, \n", | |
" nb_val_samples=val_batches.nb_sample)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<CudaNdarrayType(float32, scalar)>\n", | |
"9.99999974738e-06\n", | |
"0.0010000000475\n" | |
] | |
} | |
], | |
"source": [ | |
"print model.optimizer.lr\n", | |
"print model.optimizer.lr.get_value()\n", | |
"model.optimizer.lr.set_value(0.001)\n", | |
"print model.optimizer.lr.get_value()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/4\n", | |
"1500/1500 [==============================] - 25s - loss: 12.0653 - acc: 0.2060 - val_loss: 14.2787 - val_acc: 0.1040\n", | |
"Epoch 2/4\n", | |
"1500/1500 [==============================] - 19s - loss: 12.5922 - acc: 0.2113 - val_loss: 13.8512 - val_acc: 0.1200\n", | |
"Epoch 3/4\n", | |
"1500/1500 [==============================] - 19s - loss: 12.6270 - acc: 0.2080 - val_loss: 14.0240 - val_acc: 0.1160\n", | |
"Epoch 4/4\n", | |
"1500/1500 [==============================] - 19s - loss: 12.3764 - acc: 0.2260 - val_loss: 12.7715 - val_acc: 0.1960\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7fd154d7a050>" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches, \n", | |
" nb_val_samples=val_batches.nb_sample)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"And so we end up with 22% accuracy and 19% validation accuracy. \n", | |
"With lr = xx, we had 83% accuracy and 30% validation accuracy." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Now for a control group!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/6\n", | |
"1500/1500 [==============================] - 25s - loss: 2.3110 - acc: 0.2273 - val_loss: 4.7475 - val_acc: 0.1480\n", | |
"Epoch 2/6\n", | |
"1500/1500 [==============================] - 23s - loss: 1.6814 - acc: 0.4580 - val_loss: 3.6211 - val_acc: 0.1640\n", | |
"Epoch 3/6\n", | |
"1500/1500 [==============================] - 20s - loss: 1.2803 - acc: 0.6320 - val_loss: 2.9282 - val_acc: 0.1740\n", | |
"Epoch 4/6\n", | |
"1500/1500 [==============================] - 20s - loss: 1.0394 - acc: 0.7387 - val_loss: 2.4401 - val_acc: 0.2000\n", | |
"Epoch 5/6\n", | |
"1500/1500 [==============================] - 19s - loss: 0.8947 - acc: 0.7940 - val_loss: 2.3736 - val_acc: 0.2120\n", | |
"Epoch 6/6\n", | |
"1500/1500 [==============================] - 20s - loss: 0.7411 - acc: 0.8493 - val_loss: 2.2444 - val_acc: 0.2800\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7fd153d47050>" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = Sequential([\n", | |
" BatchNormalization(axis=1, input_shape=(3,224,224)),\n", | |
" Flatten(),\n", | |
" Dense(10, activation='softmax')\n", | |
" ])\n", | |
"model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n", | |
"model.fit_generator(batches, batches.nb_sample, nb_epoch=6, validation_data=val_batches, \n", | |
" nb_val_samples=val_batches.nb_sample)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"With no change to the learning rate, we get 85% accuracy and 28% validation accuracy. That's pretty close to the results from Jeremy's first model adjusting the learning rate via `model.optimizer.lr=0.001`" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"So I would bet that the `lr=` method doesn't actually affect the model. Keras is keeping track of the actual value behind the scenes." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`equals : 83% acc 30% val_acc` \n", | |
"`set_value : 22% acc 19% val_acc` \n", | |
"`no change : 85% acc 28% val_acc`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda root]", | |
"language": "python", | |
"name": "conda-root-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment