Created
February 23, 2018 15:23
-
-
Save carlos-aguayo/1a84354a9140c2035c04560ee4e53ae8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"'''This script goes along the blog post\n", | |
"\"Building powerful image classification models using very little data\"\n", | |
"from blog.keras.io.\n", | |
"It uses data that can be downloaded at:\n", | |
"https://www.kaggle.com/c/dogs-vs-cats/data\n", | |
"In our setup, we:\n", | |
"- created a data/ folder\n", | |
"- created train/ and validation/ subfolders inside data/\n", | |
"- created cats/ and dogs/ subfolders inside train/ and validation/\n", | |
"- put the cat pictures index 0-999 in data/train/cats\n", | |
"- put the cat pictures index 1000-1400 in data/validation/cats\n", | |
"- put the dogs pictures index 12500-13499 in data/train/dogs\n", | |
"- put the dog pictures index 13500-13900 in data/validation/dogs\n", | |
"So that we have 1000 training examples for each class, and 400 validation examples for each class.\n", | |
"In summary, this is our directory structure:\n", | |
"```\n", | |
"data/\n", | |
" train/\n", | |
" dogs/\n", | |
" dog001.jpg\n", | |
" dog002.jpg\n", | |
" ...\n", | |
" cats/\n", | |
" cat001.jpg\n", | |
" cat002.jpg\n", | |
" ...\n", | |
" validation/\n", | |
" dogs/\n", | |
" dog001.jpg\n", | |
" dog002.jpg\n", | |
" ...\n", | |
" cats/\n", | |
" cat001.jpg\n", | |
" cat002.jpg\n", | |
" ...\n", | |
"```\n", | |
"'''\n", | |
"\n", | |
"from keras.preprocessing.image import ImageDataGenerator\n", | |
"from keras.models import Sequential\n", | |
"from keras.layers import Conv2D, MaxPooling2D\n", | |
"from keras.layers import Activation, Dropout, Flatten, Dense\n", | |
"from keras import backend as K\n", | |
"\n", | |
"# dimensions of our images.\n", | |
"img_width, img_height = 150, 150\n", | |
"\n", | |
"train_data_dir = 'data/train'\n", | |
"validation_data_dir = 'data/validation'\n", | |
"nb_train_samples = 2000\n", | |
"nb_validation_samples = 800\n", | |
"epochs = 50\n", | |
"batch_size = 16\n", | |
"\n", | |
"if K.image_data_format() == 'channels_first':\n", | |
" input_shape = (3, img_width, img_height)\n", | |
"else:\n", | |
" input_shape = (img_width, img_height, 3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"bottleneck_fc_model.h5\n", | |
"bottleneck_features_train.npy\n", | |
"bottleneck_features_validation.npy\n", | |
"classifier_from_little_data_script_1.out.txt\n", | |
"classifier_from_little_data_script_1.py\n", | |
"classifier_from_little_data_script_2.out.txt\n", | |
"classifier_from_little_data_script_2.py\n", | |
"classifier_from_little_data_script_3.out.txt\n", | |
"classifier_from_little_data_script_3.py\n", | |
"data\n", | |
"first_try.h5\n", | |
"launch.sub\n", | |
"README.md\n", | |
"requirements.txt\n", | |
"summaries.py\n", | |
"Untitled.ipynb\n", | |
"vgg16_weights.h5\n", | |
"words\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"from subprocess import check_output\n", | |
"print(check_output([\"ls\", \".\"]).decode(\"utf8\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"conv2d_4 (Conv2D) (None, 148, 148, 32) 896 \n", | |
"_________________________________________________________________\n", | |
"activation_6 (Activation) (None, 148, 148, 32) 0 \n", | |
"_________________________________________________________________\n", | |
"max_pooling2d_4 (MaxPooling2 (None, 74, 74, 32) 0 \n", | |
"_________________________________________________________________\n", | |
"conv2d_5 (Conv2D) (None, 72, 72, 32) 9248 \n", | |
"_________________________________________________________________\n", | |
"activation_7 (Activation) (None, 72, 72, 32) 0 \n", | |
"_________________________________________________________________\n", | |
"max_pooling2d_5 (MaxPooling2 (None, 36, 36, 32) 0 \n", | |
"_________________________________________________________________\n", | |
"conv2d_6 (Conv2D) (None, 34, 34, 64) 18496 \n", | |
"_________________________________________________________________\n", | |
"activation_8 (Activation) (None, 34, 34, 64) 0 \n", | |
"_________________________________________________________________\n", | |
"max_pooling2d_6 (MaxPooling2 (None, 17, 17, 64) 0 \n", | |
"_________________________________________________________________\n", | |
"flatten_2 (Flatten) (None, 18496) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_3 (Dense) (None, 64) 1183808 \n", | |
"_________________________________________________________________\n", | |
"activation_9 (Activation) (None, 64) 0 \n", | |
"_________________________________________________________________\n", | |
"dropout_2 (Dropout) (None, 64) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_4 (Dense) (None, 1) 65 \n", | |
"_________________________________________________________________\n", | |
"activation_10 (Activation) (None, 1) 0 \n", | |
"=================================================================\n", | |
"Total params: 1,212,513\n", | |
"Trainable params: 1,212,513\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n", | |
"None\n" | |
] | |
} | |
], | |
"source": [ | |
"model = Sequential()\n", | |
"model.add(Conv2D(32, (3, 3), input_shape=input_shape))\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(MaxPooling2D(pool_size=(2, 2)))\n", | |
"\n", | |
"model.add(Conv2D(32, (3, 3)))\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(MaxPooling2D(pool_size=(2, 2)))\n", | |
"\n", | |
"model.add(Conv2D(64, (3, 3)))\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(MaxPooling2D(pool_size=(2, 2)))\n", | |
"\n", | |
"model.add(Flatten())\n", | |
"model.add(Dense(64))\n", | |
"model.add(Activation('relu'))\n", | |
"model.add(Dropout(0.5))\n", | |
"model.add(Dense(1))\n", | |
"model.add(Activation('sigmoid'))\n", | |
"\n", | |
"model.compile(loss='binary_crossentropy',\n", | |
" optimizer='rmsprop',\n", | |
" metrics=['accuracy'])\n", | |
"\n", | |
"print model.summary()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Found 2000 images belonging to 2 classes.\n", | |
"Found 800 images belonging to 2 classes.\n" | |
] | |
} | |
], | |
"source": [ | |
"# this is the augmentation configuration we will use for training\n", | |
"train_datagen = ImageDataGenerator(\n", | |
" rescale=1. / 255,\n", | |
" shear_range=0.2,\n", | |
" zoom_range=0.2,\n", | |
" horizontal_flip=True)\n", | |
"\n", | |
"# this is the augmentation configuration we will use for testing:\n", | |
"# only rescaling\n", | |
"test_datagen = ImageDataGenerator(rescale=1. / 255)\n", | |
"\n", | |
"train_generator = train_datagen.flow_from_directory(\n", | |
" train_data_dir,\n", | |
" target_size=(img_width, img_height),\n", | |
" batch_size=batch_size,\n", | |
" class_mode='binary')\n", | |
"\n", | |
"validation_generator = test_datagen.flow_from_directory(\n", | |
" validation_data_dir,\n", | |
" target_size=(img_width, img_height),\n", | |
" batch_size=batch_size,\n", | |
" class_mode='binary')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/50\n", | |
"125/125 [==============================] - 143s 1s/step - loss: 0.6973 - acc: 0.5485 - val_loss: 0.6620 - val_acc: 0.6162\n", | |
"Epoch 2/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.6487 - acc: 0.6330 - val_loss: 0.5987 - val_acc: 0.6837\n", | |
"Epoch 3/50\n", | |
"125/125 [==============================] - 14s 109ms/step - loss: 0.6122 - acc: 0.6830 - val_loss: 0.5921 - val_acc: 0.6837\n", | |
"Epoch 4/50\n", | |
"125/125 [==============================] - 13s 108ms/step - loss: 0.5804 - acc: 0.7030 - val_loss: 0.6243 - val_acc: 0.6750\n", | |
"Epoch 5/50\n", | |
"125/125 [==============================] - 14s 112ms/step - loss: 0.5679 - acc: 0.7115 - val_loss: 0.5546 - val_acc: 0.6875\n", | |
"Epoch 6/50\n", | |
"125/125 [==============================] - 14s 112ms/step - loss: 0.5473 - acc: 0.7270 - val_loss: 0.5235 - val_acc: 0.7462\n", | |
"Epoch 7/50\n", | |
"125/125 [==============================] - 13s 108ms/step - loss: 0.5469 - acc: 0.7395 - val_loss: 0.5655 - val_acc: 0.7163\n", | |
"Epoch 8/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.5220 - acc: 0.7490 - val_loss: 0.5479 - val_acc: 0.7425\n", | |
"Epoch 9/50\n", | |
"125/125 [==============================] - 14s 111ms/step - loss: 0.5195 - acc: 0.7485 - val_loss: 0.5502 - val_acc: 0.7288\n", | |
"Epoch 10/50\n", | |
"125/125 [==============================] - 14s 113ms/step - loss: 0.5051 - acc: 0.7600 - val_loss: 0.5099 - val_acc: 0.7625\n", | |
"Epoch 11/50\n", | |
"125/125 [==============================] - 13s 108ms/step - loss: 0.4935 - acc: 0.7645 - val_loss: 0.5068 - val_acc: 0.7612\n", | |
"Epoch 12/50\n", | |
"125/125 [==============================] - 13s 108ms/step - loss: 0.4940 - acc: 0.7705 - val_loss: 0.5633 - val_acc: 0.7087\n", | |
"Epoch 13/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.4743 - acc: 0.7885 - val_loss: 0.5504 - val_acc: 0.7575\n", | |
"Epoch 14/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.4686 - acc: 0.7920 - val_loss: 0.5001 - val_acc: 0.7525\n", | |
"Epoch 15/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.4563 - acc: 0.7875 - val_loss: 0.5038 - val_acc: 0.7475\n", | |
"Epoch 16/50\n", | |
"125/125 [==============================] - 14s 112ms/step - loss: 0.4485 - acc: 0.7930 - val_loss: 0.5095 - val_acc: 0.7762\n", | |
"Epoch 17/50\n", | |
"125/125 [==============================] - 14s 111ms/step - loss: 0.4422 - acc: 0.7945 - val_loss: 0.5289 - val_acc: 0.7700\n", | |
"Epoch 18/50\n", | |
"125/125 [==============================] - 14s 109ms/step - loss: 0.4316 - acc: 0.8010 - val_loss: 0.5616 - val_acc: 0.7525\n", | |
"Epoch 19/50\n", | |
"125/125 [==============================] - 14s 112ms/step - loss: 0.4221 - acc: 0.8085 - val_loss: 0.4811 - val_acc: 0.7900\n", | |
"Epoch 20/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.4139 - acc: 0.8220 - val_loss: 0.4857 - val_acc: 0.7562\n", | |
"Epoch 21/50\n", | |
"125/125 [==============================] - 14s 109ms/step - loss: 0.4076 - acc: 0.8165 - val_loss: 0.5973 - val_acc: 0.7738\n", | |
"Epoch 22/50\n", | |
"125/125 [==============================] - 13s 108ms/step - loss: 0.4233 - acc: 0.8125 - val_loss: 0.5127 - val_acc: 0.7562\n", | |
"Epoch 23/50\n", | |
"125/125 [==============================] - 13s 105ms/step - loss: 0.4207 - acc: 0.8090 - val_loss: 0.8362 - val_acc: 0.7200\n", | |
"Epoch 24/50\n", | |
"125/125 [==============================] - 13s 108ms/step - loss: 0.4168 - acc: 0.8215 - val_loss: 0.6152 - val_acc: 0.7488\n", | |
"Epoch 25/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.4151 - acc: 0.8145 - val_loss: 0.5806 - val_acc: 0.7425\n", | |
"Epoch 26/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.4306 - acc: 0.8180 - val_loss: 0.4943 - val_acc: 0.7712\n", | |
"Epoch 27/50\n", | |
"125/125 [==============================] - 13s 105ms/step - loss: 0.4193 - acc: 0.8225 - val_loss: 0.5273 - val_acc: 0.7812\n", | |
"Epoch 28/50\n", | |
"125/125 [==============================] - 14s 111ms/step - loss: 0.4260 - acc: 0.8085 - val_loss: 0.4955 - val_acc: 0.7550\n", | |
"Epoch 29/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.3994 - acc: 0.8300 - val_loss: 0.5761 - val_acc: 0.7863\n", | |
"Epoch 30/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.4058 - acc: 0.8280 - val_loss: 0.6078 - val_acc: 0.7638\n", | |
"Epoch 31/50\n", | |
"125/125 [==============================] - 14s 109ms/step - loss: 0.4296 - acc: 0.8200 - val_loss: 0.4906 - val_acc: 0.7738\n", | |
"Epoch 32/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.4246 - acc: 0.8200 - val_loss: 0.5235 - val_acc: 0.7625\n", | |
"Epoch 33/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.4367 - acc: 0.8180 - val_loss: 0.5570 - val_acc: 0.8013\n", | |
"Epoch 34/50\n", | |
"125/125 [==============================] - 14s 108ms/step - loss: 0.4071 - acc: 0.8250 - val_loss: 0.5295 - val_acc: 0.7700\n", | |
"Epoch 35/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.4109 - acc: 0.8310 - val_loss: 0.5368 - val_acc: 0.7913\n", | |
"Epoch 36/50\n", | |
"125/125 [==============================] - 14s 112ms/step - loss: 0.4115 - acc: 0.8255 - val_loss: 0.5052 - val_acc: 0.7775\n", | |
"Epoch 37/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.4084 - acc: 0.8285 - val_loss: 0.6167 - val_acc: 0.7762\n", | |
"Epoch 38/50\n", | |
"125/125 [==============================] - 14s 109ms/step - loss: 0.4020 - acc: 0.8280 - val_loss: 0.5512 - val_acc: 0.7638\n", | |
"Epoch 39/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.4104 - acc: 0.8280 - val_loss: 0.5622 - val_acc: 0.7475\n", | |
"Epoch 40/50\n", | |
"125/125 [==============================] - 14s 112ms/step - loss: 0.4107 - acc: 0.8345 - val_loss: 0.5083 - val_acc: 0.7400\n", | |
"Epoch 41/50\n", | |
"125/125 [==============================] - 13s 104ms/step - loss: 0.3931 - acc: 0.8320 - val_loss: 0.4750 - val_acc: 0.8100\n", | |
"Epoch 42/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.3923 - acc: 0.8420 - val_loss: 0.6980 - val_acc: 0.7913\n", | |
"Epoch 43/50\n", | |
"125/125 [==============================] - 13s 107ms/step - loss: 0.3881 - acc: 0.8390 - val_loss: 0.6334 - val_acc: 0.7975\n", | |
"Epoch 44/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.4019 - acc: 0.8255 - val_loss: 0.6023 - val_acc: 0.7650\n", | |
"Epoch 45/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.3718 - acc: 0.8470 - val_loss: 0.5327 - val_acc: 0.7850\n", | |
"Epoch 46/50\n", | |
"125/125 [==============================] - 14s 109ms/step - loss: 0.3845 - acc: 0.8375 - val_loss: 0.5310 - val_acc: 0.7850\n", | |
"Epoch 47/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.3827 - acc: 0.8435 - val_loss: 0.5491 - val_acc: 0.7913\n", | |
"Epoch 48/50\n", | |
"125/125 [==============================] - 14s 110ms/step - loss: 0.3763 - acc: 0.8420 - val_loss: 0.5367 - val_acc: 0.7937\n", | |
"Epoch 49/50\n", | |
"125/125 [==============================] - 13s 106ms/step - loss: 0.3931 - acc: 0.8370 - val_loss: 0.5441 - val_acc: 0.8000\n", | |
"Epoch 50/50\n", | |
"125/125 [==============================] - 14s 108ms/step - loss: 0.4094 - acc: 0.8325 - val_loss: 0.5628 - val_acc: 0.7825\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f409df04a10>" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.fit_generator(\n", | |
" train_generator,\n", | |
" steps_per_epoch=nb_train_samples // batch_size,\n", | |
" epochs=epochs,\n", | |
" validation_data=validation_generator,\n", | |
" validation_steps=nb_validation_samples // batch_size)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Training a CNN from scratch, after 50 epochs, we have gotten an accuracy of ~78% in the validation set.**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"model.save_weights('first_try.h5')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Environment (conda_tensorflow_p27)", | |
"language": "python", | |
"name": "conda_tensorflow_p27" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.14" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment