Created
February 23, 2018 15:23
-
-
Save carlos-aguayo/7786f6150ee6702a7416d5dd4e925fb9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", | |
" from ._conv import register_converters as _register_converters\n", | |
"Using TensorFlow backend.\n", | |
"/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/matplotlib/__init__.py:962: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #2\n", | |
" (fname, cnt))\n", | |
"/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/matplotlib/__init__.py:962: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #3\n", | |
" (fname, cnt))\n" | |
] | |
} | |
], | |
"source": [ | |
"'''This script goes along the blog post\n", | |
"\"Building powerful image classification models using very little data\"\n", | |
"from blog.keras.io.\n", | |
"It uses data that can be downloaded at:\n", | |
"https://www.kaggle.com/c/dogs-vs-cats/data\n", | |
"In our setup, we:\n", | |
"- created a data/ folder\n", | |
"- created train/ and validation/ subfolders inside data/\n", | |
"- created cats/ and dogs/ subfolders inside train/ and validation/\n", | |
"- put the cat pictures index 0-999 in data/train/cats\n", | |
"- put the cat pictures index 1000-1400 in data/validation/cats\n", | |
"- put the dogs pictures index 12500-13499 in data/train/dogs\n", | |
"- put the dog pictures index 13500-13900 in data/validation/dogs\n", | |
"So that we have 1000 training examples for each class, and 400 validation examples for each class.\n", | |
"In summary, this is our directory structure:\n", | |
"```\n", | |
"data/\n", | |
" train/\n", | |
" dogs/\n", | |
" dog001.jpg\n", | |
" dog002.jpg\n", | |
" ...\n", | |
" cats/\n", | |
" cat001.jpg\n", | |
" cat002.jpg\n", | |
" ...\n", | |
" validation/\n", | |
" dogs/\n", | |
" dog001.jpg\n", | |
" dog002.jpg\n", | |
" ...\n", | |
" cats/\n", | |
" cat001.jpg\n", | |
" cat002.jpg\n", | |
" ...\n", | |
"```\n", | |
"'''\n", | |
"import numpy as np\n", | |
"from keras.preprocessing.image import ImageDataGenerator\n", | |
"from keras.models import Sequential\n", | |
"from keras.layers import Dropout, Flatten, Dense\n", | |
"from keras import applications\n", | |
"from keras.applications import VGG16\n", | |
"\n", | |
"# dimensions of our images.\n", | |
"img_width, img_height = 150, 150\n", | |
"\n", | |
"top_model_weights_path = 'bottleneck_fc_model.h5'\n", | |
"train_data_dir = 'data/train'\n", | |
"validation_data_dir = 'data/validation'\n", | |
"nb_train_samples = 2000\n", | |
"nb_validation_samples = 800\n", | |
"epochs = 50\n", | |
"batch_size = 16" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"input_2 (InputLayer) (None, None, None, 3) 0 \n", | |
"_________________________________________________________________\n", | |
"block1_conv1 (Conv2D) (None, None, None, 64) 1792 \n", | |
"_________________________________________________________________\n", | |
"block1_conv2 (Conv2D) (None, None, None, 64) 36928 \n", | |
"_________________________________________________________________\n", | |
"block1_pool (MaxPooling2D) (None, None, None, 64) 0 \n", | |
"_________________________________________________________________\n", | |
"block2_conv1 (Conv2D) (None, None, None, 128) 73856 \n", | |
"_________________________________________________________________\n", | |
"block2_conv2 (Conv2D) (None, None, None, 128) 147584 \n", | |
"_________________________________________________________________\n", | |
"block2_pool (MaxPooling2D) (None, None, None, 128) 0 \n", | |
"_________________________________________________________________\n", | |
"block3_conv1 (Conv2D) (None, None, None, 256) 295168 \n", | |
"_________________________________________________________________\n", | |
"block3_conv2 (Conv2D) (None, None, None, 256) 590080 \n", | |
"_________________________________________________________________\n", | |
"block3_conv3 (Conv2D) (None, None, None, 256) 590080 \n", | |
"_________________________________________________________________\n", | |
"block3_pool (MaxPooling2D) (None, None, None, 256) 0 \n", | |
"_________________________________________________________________\n", | |
"block4_conv1 (Conv2D) (None, None, None, 512) 1180160 \n", | |
"_________________________________________________________________\n", | |
"block4_conv2 (Conv2D) (None, None, None, 512) 2359808 \n", | |
"_________________________________________________________________\n", | |
"block4_conv3 (Conv2D) (None, None, None, 512) 2359808 \n", | |
"_________________________________________________________________\n", | |
"block4_pool (MaxPooling2D) (None, None, None, 512) 0 \n", | |
"_________________________________________________________________\n", | |
"block5_conv1 (Conv2D) (None, None, None, 512) 2359808 \n", | |
"_________________________________________________________________\n", | |
"block5_conv2 (Conv2D) (None, None, None, 512) 2359808 \n", | |
"_________________________________________________________________\n", | |
"block5_conv3 (Conv2D) (None, None, None, 512) 2359808 \n", | |
"_________________________________________________________________\n", | |
"block5_pool (MaxPooling2D) (None, None, None, 512) 0 \n", | |
"=================================================================\n", | |
"Total params: 14,714,688\n", | |
"Trainable params: 14,714,688\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n" | |
] | |
} | |
], | |
"source": [ | |
"datagen = ImageDataGenerator(rescale=1. / 255)\n", | |
"model = VGG16(include_top=False, weights='imagenet')\n", | |
"print model.summary()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**PART 1 - Generate the output for the Convolutional Neural Network**\n", | |
"\n", | |
"Notice that we are saving the output from predict_generator into \"bottleneck_features_train.npy\" and \"bottleneck_features_validation.npy\". " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"generator = datagen.flow_from_directory(\n", | |
" train_data_dir,\n", | |
" target_size=(img_width, img_height),\n", | |
" batch_size=batch_size,\n", | |
" class_mode=None,\n", | |
" shuffle=False)\n", | |
"\n", | |
"bottleneck_features_train = model.predict_generator(generator, nb_train_samples // batch_size)\n", | |
"np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"generator = datagen.flow_from_directory(\n", | |
" validation_data_dir,\n", | |
" target_size=(img_width, img_height),\n", | |
" batch_size=batch_size,\n", | |
" class_mode=None,\n", | |
" shuffle=False)\n", | |
"\n", | |
"bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples // batch_size)\n", | |
"np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**PART 2 - Train the fully connected network**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(2000, 4, 4, 512)\n", | |
"(800, 4, 4, 512)\n" | |
] | |
} | |
], | |
"source": [ | |
"# Train top model\n", | |
"train_data = np.load(open('bottleneck_features_train.npy'))\n", | |
"train_labels = np.array([0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))\n", | |
"\n", | |
"validation_data = np.load(open('bottleneck_features_validation.npy'))\n", | |
"validation_labels = np.array([0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))\n", | |
"\n", | |
"print train_data.shape\n", | |
"print validation_data.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Train on 2000 samples, validate on 800 samples\n", | |
"Epoch 1/50\n", | |
"2000/2000 [==============================] - 1s 593us/step - loss: 0.6727 - acc: 0.7705 - val_loss: 0.2750 - val_acc: 0.8912\n", | |
"Epoch 2/50\n", | |
"2000/2000 [==============================] - 1s 444us/step - loss: 0.3633 - acc: 0.8520 - val_loss: 0.2807 - val_acc: 0.8812\n", | |
"Epoch 3/50\n", | |
"2000/2000 [==============================] - 1s 444us/step - loss: 0.2972 - acc: 0.8795 - val_loss: 0.2415 - val_acc: 0.9087\n", | |
"Epoch 4/50\n", | |
"2000/2000 [==============================] - 1s 440us/step - loss: 0.2607 - acc: 0.8970 - val_loss: 0.2614 - val_acc: 0.8988\n", | |
"Epoch 5/50\n", | |
"2000/2000 [==============================] - 1s 497us/step - loss: 0.2137 - acc: 0.9190 - val_loss: 0.4860 - val_acc: 0.8237\n", | |
"Epoch 6/50\n", | |
"2000/2000 [==============================] - 1s 487us/step - loss: 0.2084 - acc: 0.9260 - val_loss: 0.3062 - val_acc: 0.8950\n", | |
"Epoch 7/50\n", | |
"2000/2000 [==============================] - 1s 461us/step - loss: 0.1931 - acc: 0.9315 - val_loss: 0.3913 - val_acc: 0.8650\n", | |
"Epoch 8/50\n", | |
"2000/2000 [==============================] - 1s 452us/step - loss: 0.1534 - acc: 0.9380 - val_loss: 0.3031 - val_acc: 0.9050\n", | |
"Epoch 9/50\n", | |
"2000/2000 [==============================] - 1s 432us/step - loss: 0.1524 - acc: 0.9470 - val_loss: 0.3807 - val_acc: 0.8862\n", | |
"Epoch 10/50\n", | |
"2000/2000 [==============================] - 1s 431us/step - loss: 0.1234 - acc: 0.9495 - val_loss: 0.4787 - val_acc: 0.8675\n", | |
"Epoch 11/50\n", | |
"2000/2000 [==============================] - 1s 430us/step - loss: 0.1097 - acc: 0.9580 - val_loss: 0.3980 - val_acc: 0.8925\n", | |
"Epoch 12/50\n", | |
"2000/2000 [==============================] - 1s 427us/step - loss: 0.1118 - acc: 0.9570 - val_loss: 0.4078 - val_acc: 0.9000\n", | |
"Epoch 13/50\n", | |
"2000/2000 [==============================] - 1s 427us/step - loss: 0.0819 - acc: 0.9680 - val_loss: 0.4123 - val_acc: 0.8938\n", | |
"Epoch 14/50\n", | |
"2000/2000 [==============================] - 1s 434us/step - loss: 0.0790 - acc: 0.9695 - val_loss: 0.4803 - val_acc: 0.8975\n", | |
"Epoch 15/50\n", | |
"2000/2000 [==============================] - 1s 437us/step - loss: 0.0721 - acc: 0.9720 - val_loss: 0.4371 - val_acc: 0.9038\n", | |
"Epoch 16/50\n", | |
"2000/2000 [==============================] - 1s 432us/step - loss: 0.0712 - acc: 0.9715 - val_loss: 0.5592 - val_acc: 0.9000\n", | |
"Epoch 17/50\n", | |
"2000/2000 [==============================] - 1s 442us/step - loss: 0.0598 - acc: 0.9765 - val_loss: 0.5341 - val_acc: 0.8950\n", | |
"Epoch 18/50\n", | |
"2000/2000 [==============================] - 1s 424us/step - loss: 0.0549 - acc: 0.9825 - val_loss: 0.7652 - val_acc: 0.8675\n", | |
"Epoch 19/50\n", | |
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0461 - acc: 0.9820 - val_loss: 0.4808 - val_acc: 0.9025\n", | |
"Epoch 20/50\n", | |
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0427 - acc: 0.9845 - val_loss: 0.5831 - val_acc: 0.8988\n", | |
"Epoch 21/50\n", | |
"2000/2000 [==============================] - 1s 428us/step - loss: 0.0324 - acc: 0.9890 - val_loss: 0.5708 - val_acc: 0.8962\n", | |
"Epoch 22/50\n", | |
"2000/2000 [==============================] - 1s 430us/step - loss: 0.0325 - acc: 0.9890 - val_loss: 0.7020 - val_acc: 0.8862\n", | |
"Epoch 23/50\n", | |
"2000/2000 [==============================] - 1s 428us/step - loss: 0.0471 - acc: 0.9845 - val_loss: 0.6452 - val_acc: 0.8875\n", | |
"Epoch 24/50\n", | |
"2000/2000 [==============================] - 1s 428us/step - loss: 0.0187 - acc: 0.9930 - val_loss: 0.8209 - val_acc: 0.8888\n", | |
"Epoch 25/50\n", | |
"2000/2000 [==============================] - 1s 433us/step - loss: 0.0305 - acc: 0.9885 - val_loss: 0.6564 - val_acc: 0.8925\n", | |
"Epoch 26/50\n", | |
"2000/2000 [==============================] - 1s 449us/step - loss: 0.0225 - acc: 0.9925 - val_loss: 0.6778 - val_acc: 0.8962\n", | |
"Epoch 27/50\n", | |
"2000/2000 [==============================] - 1s 430us/step - loss: 0.0227 - acc: 0.9925 - val_loss: 0.7210 - val_acc: 0.8900\n", | |
"Epoch 28/50\n", | |
"2000/2000 [==============================] - 1s 442us/step - loss: 0.0289 - acc: 0.9880 - val_loss: 0.6965 - val_acc: 0.8975\n", | |
"Epoch 29/50\n", | |
"2000/2000 [==============================] - 1s 444us/step - loss: 0.0329 - acc: 0.9905 - val_loss: 0.7614 - val_acc: 0.8962\n", | |
"Epoch 30/50\n", | |
"2000/2000 [==============================] - 1s 445us/step - loss: 0.0185 - acc: 0.9930 - val_loss: 0.7955 - val_acc: 0.9000\n", | |
"Epoch 31/50\n", | |
"2000/2000 [==============================] - 1s 442us/step - loss: 0.0294 - acc: 0.9915 - val_loss: 0.7204 - val_acc: 0.8975\n", | |
"Epoch 32/50\n", | |
"2000/2000 [==============================] - 1s 436us/step - loss: 0.0277 - acc: 0.9915 - val_loss: 0.7362 - val_acc: 0.8988\n", | |
"Epoch 33/50\n", | |
"2000/2000 [==============================] - 1s 441us/step - loss: 0.0252 - acc: 0.9925 - val_loss: 0.7711 - val_acc: 0.9012\n", | |
"Epoch 34/50\n", | |
"2000/2000 [==============================] - 1s 424us/step - loss: 0.0191 - acc: 0.9920 - val_loss: 0.8754 - val_acc: 0.8938\n", | |
"Epoch 35/50\n", | |
"2000/2000 [==============================] - 1s 451us/step - loss: 0.0219 - acc: 0.9945 - val_loss: 0.8236 - val_acc: 0.9000\n", | |
"Epoch 36/50\n", | |
"2000/2000 [==============================] - 1s 441us/step - loss: 0.0216 - acc: 0.9925 - val_loss: 0.7772 - val_acc: 0.8950\n", | |
"Epoch 37/50\n", | |
"2000/2000 [==============================] - 1s 435us/step - loss: 0.0130 - acc: 0.9960 - val_loss: 0.8687 - val_acc: 0.8900\n", | |
"Epoch 38/50\n", | |
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0361 - acc: 0.9920 - val_loss: 0.8328 - val_acc: 0.9012\n", | |
"Epoch 39/50\n", | |
"2000/2000 [==============================] - 1s 446us/step - loss: 0.0128 - acc: 0.9960 - val_loss: 1.0353 - val_acc: 0.8825\n", | |
"Epoch 40/50\n", | |
"2000/2000 [==============================] - 1s 435us/step - loss: 0.0074 - acc: 0.9975 - val_loss: 0.9737 - val_acc: 0.8912\n", | |
"Epoch 41/50\n", | |
"2000/2000 [==============================] - 1s 435us/step - loss: 0.0139 - acc: 0.9965 - val_loss: 0.9904 - val_acc: 0.8850\n", | |
"Epoch 42/50\n", | |
"2000/2000 [==============================] - 1s 437us/step - loss: 0.0318 - acc: 0.9925 - val_loss: 0.9131 - val_acc: 0.9038\n", | |
"Epoch 43/50\n", | |
"2000/2000 [==============================] - 1s 449us/step - loss: 0.0219 - acc: 0.9930 - val_loss: 0.7781 - val_acc: 0.9000\n", | |
"Epoch 44/50\n", | |
"2000/2000 [==============================] - 1s 457us/step - loss: 0.0145 - acc: 0.9965 - val_loss: 0.8813 - val_acc: 0.8975\n", | |
"Epoch 45/50\n", | |
"2000/2000 [==============================] - 1s 440us/step - loss: 0.0187 - acc: 0.9945 - val_loss: 0.9299 - val_acc: 0.8938\n", | |
"Epoch 46/50\n", | |
"2000/2000 [==============================] - 1s 433us/step - loss: 0.0171 - acc: 0.9960 - val_loss: 0.8352 - val_acc: 0.8925\n", | |
"Epoch 47/50\n", | |
"2000/2000 [==============================] - 1s 443us/step - loss: 0.0103 - acc: 0.9960 - val_loss: 0.9185 - val_acc: 0.8962\n", | |
"Epoch 48/50\n", | |
"2000/2000 [==============================] - 1s 439us/step - loss: 0.0035 - acc: 0.9980 - val_loss: 0.9365 - val_acc: 0.9012\n", | |
"Epoch 49/50\n", | |
"2000/2000 [==============================] - 1s 440us/step - loss: 0.0079 - acc: 0.9975 - val_loss: 0.9359 - val_acc: 0.8950\n", | |
"Epoch 50/50\n", | |
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0148 - acc: 0.9960 - val_loss: 0.9338 - val_acc: 0.8975\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f1d0ada7bd0>" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = Sequential()\n", | |
"model.add(Flatten(input_shape=train_data.shape[1:]))\n", | |
"model.add(Dense(256, activation='relu'))\n", | |
"model.add(Dropout(0.5))\n", | |
"model.add(Dense(1, activation='sigmoid'))\n", | |
"\n", | |
"model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])\n", | |
"\n", | |
"model.fit(train_data, \n", | |
" train_labels,\n", | |
" epochs=epochs,\n", | |
" batch_size=batch_size,\n", | |
" validation_data=(validation_data, validation_labels))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**By transfering learning from VGG16, after 50 epochs, we have gotten an accuracy of ~89% in the validation set.**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"model.save_weights(top_model_weights_path)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Environment (conda_tensorflow_p27)", | |
"language": "python", | |
"name": "conda_tensorflow_p27" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.14" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment