Skip to content

Instantly share code, notes, and snippets.

@carlos-aguayo
Created February 23, 2018 15:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carlos-aguayo/7786f6150ee6702a7416d5dd4e925fb9 to your computer and use it in GitHub Desktop.
Save carlos-aguayo/7786f6150ee6702a7416d5dd4e925fb9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n",
"/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/matplotlib/__init__.py:962: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #2\n",
" (fname, cnt))\n",
"/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/matplotlib/__init__.py:962: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #3\n",
" (fname, cnt))\n"
]
}
],
"source": [
"'''This script goes along the blog post\n",
"\"Building powerful image classification models using very little data\"\n",
"from blog.keras.io.\n",
"It uses data that can be downloaded at:\n",
"https://www.kaggle.com/c/dogs-vs-cats/data\n",
"In our setup, we:\n",
"- created a data/ folder\n",
"- created train/ and validation/ subfolders inside data/\n",
"- created cats/ and dogs/ subfolders inside train/ and validation/\n",
"- put the cat pictures index 0-999 in data/train/cats\n",
"- put the cat pictures index 1000-1400 in data/validation/cats\n",
"- put the dogs pictures index 12500-13499 in data/train/dogs\n",
"- put the dog pictures index 13500-13900 in data/validation/dogs\n",
"So that we have 1000 training examples for each class, and 400 validation examples for each class.\n",
"In summary, this is our directory structure:\n",
"```\n",
"data/\n",
" train/\n",
" dogs/\n",
" dog001.jpg\n",
" dog002.jpg\n",
" ...\n",
" cats/\n",
" cat001.jpg\n",
" cat002.jpg\n",
" ...\n",
" validation/\n",
" dogs/\n",
" dog001.jpg\n",
" dog002.jpg\n",
" ...\n",
" cats/\n",
" cat001.jpg\n",
" cat002.jpg\n",
" ...\n",
"```\n",
"'''\n",
"import numpy as np\n",
"from keras.preprocessing.image import ImageDataGenerator\n",
"from keras.models import Sequential\n",
"from keras.layers import Dropout, Flatten, Dense\n",
"from keras import applications\n",
"from keras.applications import VGG16\n",
"\n",
"# dimensions of our images.\n",
"img_width, img_height = 150, 150\n",
"\n",
"top_model_weights_path = 'bottleneck_fc_model.h5'\n",
"train_data_dir = 'data/train'\n",
"validation_data_dir = 'data/validation'\n",
"nb_train_samples = 2000\n",
"nb_validation_samples = 800\n",
"epochs = 50\n",
"batch_size = 16"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input_2 (InputLayer) (None, None, None, 3) 0 \n",
"_________________________________________________________________\n",
"block1_conv1 (Conv2D) (None, None, None, 64) 1792 \n",
"_________________________________________________________________\n",
"block1_conv2 (Conv2D) (None, None, None, 64) 36928 \n",
"_________________________________________________________________\n",
"block1_pool (MaxPooling2D) (None, None, None, 64) 0 \n",
"_________________________________________________________________\n",
"block2_conv1 (Conv2D) (None, None, None, 128) 73856 \n",
"_________________________________________________________________\n",
"block2_conv2 (Conv2D) (None, None, None, 128) 147584 \n",
"_________________________________________________________________\n",
"block2_pool (MaxPooling2D) (None, None, None, 128) 0 \n",
"_________________________________________________________________\n",
"block3_conv1 (Conv2D) (None, None, None, 256) 295168 \n",
"_________________________________________________________________\n",
"block3_conv2 (Conv2D) (None, None, None, 256) 590080 \n",
"_________________________________________________________________\n",
"block3_conv3 (Conv2D) (None, None, None, 256) 590080 \n",
"_________________________________________________________________\n",
"block3_pool (MaxPooling2D) (None, None, None, 256) 0 \n",
"_________________________________________________________________\n",
"block4_conv1 (Conv2D) (None, None, None, 512) 1180160 \n",
"_________________________________________________________________\n",
"block4_conv2 (Conv2D) (None, None, None, 512) 2359808 \n",
"_________________________________________________________________\n",
"block4_conv3 (Conv2D) (None, None, None, 512) 2359808 \n",
"_________________________________________________________________\n",
"block4_pool (MaxPooling2D) (None, None, None, 512) 0 \n",
"_________________________________________________________________\n",
"block5_conv1 (Conv2D) (None, None, None, 512) 2359808 \n",
"_________________________________________________________________\n",
"block5_conv2 (Conv2D) (None, None, None, 512) 2359808 \n",
"_________________________________________________________________\n",
"block5_conv3 (Conv2D) (None, None, None, 512) 2359808 \n",
"_________________________________________________________________\n",
"block5_pool (MaxPooling2D) (None, None, None, 512) 0 \n",
"=================================================================\n",
"Total params: 14,714,688\n",
"Trainable params: 14,714,688\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"None\n"
]
}
],
"source": [
"datagen = ImageDataGenerator(rescale=1. / 255)\n",
"model = VGG16(include_top=False, weights='imagenet')\n",
"print model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**PART 1 - Generate the output for the Convolutional Neural Network**\n",
"\n",
"Notice that we are saving the output from predict_generator into \"bottleneck_features_train.npy\" and \"bottleneck_features_validation.npy\". "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"generator = datagen.flow_from_directory(\n",
" train_data_dir,\n",
" target_size=(img_width, img_height),\n",
" batch_size=batch_size,\n",
" class_mode=None,\n",
" shuffle=False)\n",
"\n",
"bottleneck_features_train = model.predict_generator(generator, nb_train_samples // batch_size)\n",
"np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"generator = datagen.flow_from_directory(\n",
" validation_data_dir,\n",
" target_size=(img_width, img_height),\n",
" batch_size=batch_size,\n",
" class_mode=None,\n",
" shuffle=False)\n",
"\n",
"bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples // batch_size)\n",
"np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**PART 2 - Train the fully connected network**"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2000, 4, 4, 512)\n",
"(800, 4, 4, 512)\n"
]
}
],
"source": [
"# Train top model\n",
"train_data = np.load(open('bottleneck_features_train.npy'))\n",
"train_labels = np.array([0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))\n",
"\n",
"validation_data = np.load(open('bottleneck_features_validation.npy'))\n",
"validation_labels = np.array([0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))\n",
"\n",
"print train_data.shape\n",
"print validation_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 2000 samples, validate on 800 samples\n",
"Epoch 1/50\n",
"2000/2000 [==============================] - 1s 593us/step - loss: 0.6727 - acc: 0.7705 - val_loss: 0.2750 - val_acc: 0.8912\n",
"Epoch 2/50\n",
"2000/2000 [==============================] - 1s 444us/step - loss: 0.3633 - acc: 0.8520 - val_loss: 0.2807 - val_acc: 0.8812\n",
"Epoch 3/50\n",
"2000/2000 [==============================] - 1s 444us/step - loss: 0.2972 - acc: 0.8795 - val_loss: 0.2415 - val_acc: 0.9087\n",
"Epoch 4/50\n",
"2000/2000 [==============================] - 1s 440us/step - loss: 0.2607 - acc: 0.8970 - val_loss: 0.2614 - val_acc: 0.8988\n",
"Epoch 5/50\n",
"2000/2000 [==============================] - 1s 497us/step - loss: 0.2137 - acc: 0.9190 - val_loss: 0.4860 - val_acc: 0.8237\n",
"Epoch 6/50\n",
"2000/2000 [==============================] - 1s 487us/step - loss: 0.2084 - acc: 0.9260 - val_loss: 0.3062 - val_acc: 0.8950\n",
"Epoch 7/50\n",
"2000/2000 [==============================] - 1s 461us/step - loss: 0.1931 - acc: 0.9315 - val_loss: 0.3913 - val_acc: 0.8650\n",
"Epoch 8/50\n",
"2000/2000 [==============================] - 1s 452us/step - loss: 0.1534 - acc: 0.9380 - val_loss: 0.3031 - val_acc: 0.9050\n",
"Epoch 9/50\n",
"2000/2000 [==============================] - 1s 432us/step - loss: 0.1524 - acc: 0.9470 - val_loss: 0.3807 - val_acc: 0.8862\n",
"Epoch 10/50\n",
"2000/2000 [==============================] - 1s 431us/step - loss: 0.1234 - acc: 0.9495 - val_loss: 0.4787 - val_acc: 0.8675\n",
"Epoch 11/50\n",
"2000/2000 [==============================] - 1s 430us/step - loss: 0.1097 - acc: 0.9580 - val_loss: 0.3980 - val_acc: 0.8925\n",
"Epoch 12/50\n",
"2000/2000 [==============================] - 1s 427us/step - loss: 0.1118 - acc: 0.9570 - val_loss: 0.4078 - val_acc: 0.9000\n",
"Epoch 13/50\n",
"2000/2000 [==============================] - 1s 427us/step - loss: 0.0819 - acc: 0.9680 - val_loss: 0.4123 - val_acc: 0.8938\n",
"Epoch 14/50\n",
"2000/2000 [==============================] - 1s 434us/step - loss: 0.0790 - acc: 0.9695 - val_loss: 0.4803 - val_acc: 0.8975\n",
"Epoch 15/50\n",
"2000/2000 [==============================] - 1s 437us/step - loss: 0.0721 - acc: 0.9720 - val_loss: 0.4371 - val_acc: 0.9038\n",
"Epoch 16/50\n",
"2000/2000 [==============================] - 1s 432us/step - loss: 0.0712 - acc: 0.9715 - val_loss: 0.5592 - val_acc: 0.9000\n",
"Epoch 17/50\n",
"2000/2000 [==============================] - 1s 442us/step - loss: 0.0598 - acc: 0.9765 - val_loss: 0.5341 - val_acc: 0.8950\n",
"Epoch 18/50\n",
"2000/2000 [==============================] - 1s 424us/step - loss: 0.0549 - acc: 0.9825 - val_loss: 0.7652 - val_acc: 0.8675\n",
"Epoch 19/50\n",
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0461 - acc: 0.9820 - val_loss: 0.4808 - val_acc: 0.9025\n",
"Epoch 20/50\n",
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0427 - acc: 0.9845 - val_loss: 0.5831 - val_acc: 0.8988\n",
"Epoch 21/50\n",
"2000/2000 [==============================] - 1s 428us/step - loss: 0.0324 - acc: 0.9890 - val_loss: 0.5708 - val_acc: 0.8962\n",
"Epoch 22/50\n",
"2000/2000 [==============================] - 1s 430us/step - loss: 0.0325 - acc: 0.9890 - val_loss: 0.7020 - val_acc: 0.8862\n",
"Epoch 23/50\n",
"2000/2000 [==============================] - 1s 428us/step - loss: 0.0471 - acc: 0.9845 - val_loss: 0.6452 - val_acc: 0.8875\n",
"Epoch 24/50\n",
"2000/2000 [==============================] - 1s 428us/step - loss: 0.0187 - acc: 0.9930 - val_loss: 0.8209 - val_acc: 0.8888\n",
"Epoch 25/50\n",
"2000/2000 [==============================] - 1s 433us/step - loss: 0.0305 - acc: 0.9885 - val_loss: 0.6564 - val_acc: 0.8925\n",
"Epoch 26/50\n",
"2000/2000 [==============================] - 1s 449us/step - loss: 0.0225 - acc: 0.9925 - val_loss: 0.6778 - val_acc: 0.8962\n",
"Epoch 27/50\n",
"2000/2000 [==============================] - 1s 430us/step - loss: 0.0227 - acc: 0.9925 - val_loss: 0.7210 - val_acc: 0.8900\n",
"Epoch 28/50\n",
"2000/2000 [==============================] - 1s 442us/step - loss: 0.0289 - acc: 0.9880 - val_loss: 0.6965 - val_acc: 0.8975\n",
"Epoch 29/50\n",
"2000/2000 [==============================] - 1s 444us/step - loss: 0.0329 - acc: 0.9905 - val_loss: 0.7614 - val_acc: 0.8962\n",
"Epoch 30/50\n",
"2000/2000 [==============================] - 1s 445us/step - loss: 0.0185 - acc: 0.9930 - val_loss: 0.7955 - val_acc: 0.9000\n",
"Epoch 31/50\n",
"2000/2000 [==============================] - 1s 442us/step - loss: 0.0294 - acc: 0.9915 - val_loss: 0.7204 - val_acc: 0.8975\n",
"Epoch 32/50\n",
"2000/2000 [==============================] - 1s 436us/step - loss: 0.0277 - acc: 0.9915 - val_loss: 0.7362 - val_acc: 0.8988\n",
"Epoch 33/50\n",
"2000/2000 [==============================] - 1s 441us/step - loss: 0.0252 - acc: 0.9925 - val_loss: 0.7711 - val_acc: 0.9012\n",
"Epoch 34/50\n",
"2000/2000 [==============================] - 1s 424us/step - loss: 0.0191 - acc: 0.9920 - val_loss: 0.8754 - val_acc: 0.8938\n",
"Epoch 35/50\n",
"2000/2000 [==============================] - 1s 451us/step - loss: 0.0219 - acc: 0.9945 - val_loss: 0.8236 - val_acc: 0.9000\n",
"Epoch 36/50\n",
"2000/2000 [==============================] - 1s 441us/step - loss: 0.0216 - acc: 0.9925 - val_loss: 0.7772 - val_acc: 0.8950\n",
"Epoch 37/50\n",
"2000/2000 [==============================] - 1s 435us/step - loss: 0.0130 - acc: 0.9960 - val_loss: 0.8687 - val_acc: 0.8900\n",
"Epoch 38/50\n",
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0361 - acc: 0.9920 - val_loss: 0.8328 - val_acc: 0.9012\n",
"Epoch 39/50\n",
"2000/2000 [==============================] - 1s 446us/step - loss: 0.0128 - acc: 0.9960 - val_loss: 1.0353 - val_acc: 0.8825\n",
"Epoch 40/50\n",
"2000/2000 [==============================] - 1s 435us/step - loss: 0.0074 - acc: 0.9975 - val_loss: 0.9737 - val_acc: 0.8912\n",
"Epoch 41/50\n",
"2000/2000 [==============================] - 1s 435us/step - loss: 0.0139 - acc: 0.9965 - val_loss: 0.9904 - val_acc: 0.8850\n",
"Epoch 42/50\n",
"2000/2000 [==============================] - 1s 437us/step - loss: 0.0318 - acc: 0.9925 - val_loss: 0.9131 - val_acc: 0.9038\n",
"Epoch 43/50\n",
"2000/2000 [==============================] - 1s 449us/step - loss: 0.0219 - acc: 0.9930 - val_loss: 0.7781 - val_acc: 0.9000\n",
"Epoch 44/50\n",
"2000/2000 [==============================] - 1s 457us/step - loss: 0.0145 - acc: 0.9965 - val_loss: 0.8813 - val_acc: 0.8975\n",
"Epoch 45/50\n",
"2000/2000 [==============================] - 1s 440us/step - loss: 0.0187 - acc: 0.9945 - val_loss: 0.9299 - val_acc: 0.8938\n",
"Epoch 46/50\n",
"2000/2000 [==============================] - 1s 433us/step - loss: 0.0171 - acc: 0.9960 - val_loss: 0.8352 - val_acc: 0.8925\n",
"Epoch 47/50\n",
"2000/2000 [==============================] - 1s 443us/step - loss: 0.0103 - acc: 0.9960 - val_loss: 0.9185 - val_acc: 0.8962\n",
"Epoch 48/50\n",
"2000/2000 [==============================] - 1s 439us/step - loss: 0.0035 - acc: 0.9980 - val_loss: 0.9365 - val_acc: 0.9012\n",
"Epoch 49/50\n",
"2000/2000 [==============================] - 1s 440us/step - loss: 0.0079 - acc: 0.9975 - val_loss: 0.9359 - val_acc: 0.8950\n",
"Epoch 50/50\n",
"2000/2000 [==============================] - 1s 438us/step - loss: 0.0148 - acc: 0.9960 - val_loss: 0.9338 - val_acc: 0.8975\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7f1d0ada7bd0>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = Sequential()\n",
"model.add(Flatten(input_shape=train_data.shape[1:]))\n",
"model.add(Dense(256, activation='relu'))\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(1, activation='sigmoid'))\n",
"\n",
"model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])\n",
"\n",
"model.fit(train_data, \n",
" train_labels,\n",
" epochs=epochs,\n",
" batch_size=batch_size,\n",
" validation_data=(validation_data, validation_labels))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**By transfering learning from VGG16, after 50 epochs, we have gotten an accuracy of ~89% in the validation set.**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.save_weights(top_model_weights_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Environment (conda_tensorflow_p27)",
"language": "python",
"name": "conda_tensorflow_p27"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment