Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save kechan/38b4e6c85501246a2a49deadd26aefd7 to your computer and use it in GitHub Desktop.
Save kechan/38b4e6c85501246a2a49deadd26aefd7 to your computer and use it in GitHub Desktop.
Keras Transfer Learning with feature caching.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Keras Transfer Learning with feature caching.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python2",
"display_name": "Python 2"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"[View in Colaboratory](https://colab.research.google.com/gist/kechan/38b4e6c85501246a2a49deadd26aefd7/keras-transfer-learning-with-feature-caching.ipynb)"
]
},
{
"metadata": {
"id": "eZvBWtxiCi8a",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "0f8fc981-94e9-46f9-9280-c1aaa36b7956"
},
"cell_type": "code",
"source": [
"import tensorflow as tf\n",
"device_name = tf.test.gpu_device_name()\n",
"if device_name != '/device:GPU:0':\n",
" raise SystemError('GPU device not found')\n",
"print('Found GPU at: {}'.format(device_name))"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Found GPU at: /device:GPU:0\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "AvJ_yMjvzf2m",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"outputId": "28c1ee7d-7c56-491a-bd47-acb44edae84a"
},
"cell_type": "code",
"source": [
"ls -l"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"total 12\r\n",
"drwxr-xr-x 2 root root 4096 May 20 18:54 \u001b[0m\u001b[01;34mdata\u001b[0m/\r\n",
"drwxr-xr-x 1 root root 4096 May 20 18:39 \u001b[01;34mdatalab\u001b[0m/\r\n",
"drwxr-xr-x 6 root root 4096 May 20 18:55 \u001b[01;34mKerasVision\u001b[0m/\r\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "QQaQeFlgC_Ug",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"!git clone https://github.com/kechan/KerasVision.git"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "9plAlIdkEIjb",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"!pip install -U -q PyDrive"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "LjkHA3vWEKPN",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"def download_data_from_gdrive(ids, filenames):\n",
" \n",
" for id, filename in zip(ids, filenames):\n",
" uploaded = drive.CreateFile({'id': id})\n",
" uploaded.GetContentFile(filename)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "3r5f9icFEMXK",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"from pydrive.auth import GoogleAuth\n",
"from pydrive.drive import GoogleDrive\n",
"from google.colab import auth\n",
"from oauth2client.client import GoogleCredentials\n",
"\n",
"auth.authenticate_user()\n",
"gauth = GoogleAuth()\n",
"gauth.credentials = GoogleCredentials.get_application_default()\n",
"drive = GoogleDrive(gauth)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "NTMJGv6IEVqY",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"download_data_from_gdrive(['1Zdt10Q1Jn-hrq2o1mmvQ1j4DgBTxxGIq', '1FgVh2oGqH9Pr4Ze2NETyLnBTPtC0hTui', '1X6ijkgbWCzATPCJLx0rBCy5jtUkjo2KG'], \n",
" ['train_224_224.hdf5.gz', 'validation_224_224.hdf5.gz', 'test_224_224.hdf5.gz'])"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "3iucLd5AyfwA",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "e7fdb6c3-b01f-4aa8-dd53-a159147b7b2a"
},
"cell_type": "code",
"source": [
"cd KerasVision/"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"/content/KerasVision\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "jzhSsgOFzrlz",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"!pip install tqdm"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "mc6Mh6mSDWhO",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"#### Imports"
]
},
{
"metadata": {
"id": "w9oO1OQuC0y6",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "28bcc323-8e57-43db-e3a2-0132e652714a"
},
"cell_type": "code",
"source": [
"import os\n",
"from tqdm import tqdm\n",
"import matplotlib.pyplot as plt\n",
"from keras.preprocessing import image\n",
"\n",
"from keras.models import Sequential, Model\n",
"from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten, InputLayer\n",
"from keras.layers import BatchNormalization, Activation\n",
"from keras.preprocessing.image import ImageDataGenerator\n",
"from keras import optimizers\n",
"from keras.utils import to_categorical, plot_model\n",
"from keras.models import load_model\n",
"from keras.applications import VGG16, MobileNet\n",
"\n",
"from data.data_util import *\n",
"from data.load_data import from_splitted_hdf5\n",
"from train import *\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import pickle\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"%matplotlib inline\n",
"\n",
"from data.augmentation.CustomImageDataGenerator import * \n",
"\n",
"import h5py"
],
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "m-vdSj5HBkSC",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"!pwd\n"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "X08LE52uC47C",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"#train_h5 = h5py.File('../data/train_224_224.hdf5', mode='r')\n",
"#dev_h5 = h5py.File('../data/validation_224_224.hdf5', mode='r')\n",
"\n",
"#train_set_x = train_h5['train_set_x'][:]\n",
"#train_set_y = train_h5['train_set_y'][:]\n",
"\n",
"#dev_set_x = dev_h5['dev_set_x'][:]\n",
"#dev_set_y = dev_h5['dev_set_y'][:]\n",
"\n",
"train_set_x, train_set_y, dev_set_x, dev_set_y, _, _, classes = \\\n",
"from_splitted_hdf5('../data')"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "m19WyqDTSkGY",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "ff9e2443-f861-4cb3-82fc-852f439ac0f7"
},
"cell_type": "code",
"source": [
"train_set_x.shape"
],
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(2978, 224, 224, 3)"
]
},
"metadata": {
"tags": []
},
"execution_count": 16
}
]
},
{
"metadata": {
"id": "Xu_OjHyDDdW4",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"conv_base = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Ay-uip_VFmtr",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"conv_base.summary()"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Nq_NmXaefCjq",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"datagen = ImageDataGenerator(rescale=1./255)\n",
"batch_size = 32"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "yWTM1Ux3TF3a",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"def extract_features(set_x, set_y, sample_count):\n",
" features = np.zeros(shape=(sample_count, 7, 7, 1024))\n",
" labels = np.zeros(shape=(sample_count,1))\n",
" \n",
" generator = datagen.flow(set_x, set_y, batch_size=batch_size)\n",
" \n",
" i = 0\n",
" for inputs_batch, labels_batch in tqdm(generator):\n",
" \n",
" features_batch = conv_base.predict(inputs_batch)\n",
" \n",
" #print(features_batch.shape)\n",
" #print(labels_batch.shape)\n",
" \n",
" features[i * batch_size : (i + 1) * batch_size] = features_batch\n",
" labels[i * batch_size : (i + 1) * batch_size] = labels_batch\n",
" i += 1\n",
" \n",
" if i * batch_size >= sample_count:\n",
" # Note that since generators yield data indefinitely in a loop,\n",
" # we must `break` after every image has been seen once.\n",
" break\n",
" \n",
" return features, labels"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "dNZkGYCOTMQ0",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"feature_train_set_x, feature_train_set_y = extract_features(train_set_x, train_set_y, len(train_set_x))\n",
"feature_dev_set_x, feature_dev_set_y = extract_features(dev_set_x, dev_set_y, len(dev_set_x))"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "0R31FaYxTO-R",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
},
"outputId": "c006b4bf-6d3e-434e-d35e-29e874efe57b"
},
"cell_type": "code",
"source": [
"feature_train_set_x.shape, train_set_x.shape, feature_dev_set_x.shape, dev_set_x.shape, feature_train_set_x.dtype"
],
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((2978, 7, 7, 1024),\n",
" (2978, 224, 224, 3),\n",
" (600, 7, 7, 1024),\n",
" (600, 224, 224, 3),\n",
" dtype('float64'))"
]
},
"metadata": {
"tags": []
},
"execution_count": 20
}
]
},
{
"metadata": {
"id": "NHV1iRrlTYQQ",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"# reshape\n",
"#feature_train_set_x = np.reshape(feature_train_set_x, (len(feature_train_set_x), 7 * 7 * 1024))\n",
"#feature_dev_set_x = np.reshape(feature_dev_set_x, (len(feature_dev_set_x), 7 * 7 * 1024))\n",
"\n",
"# one-hot \n",
"feature_train_set_y = to_categorical(feature_train_set_y)\n",
"feature_dev_set_y = to_categorical(feature_dev_set_y)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "zyfp1l2yTnSe",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "e8514306-9a1c-43d2-ed73-e36e994c767d"
},
"cell_type": "code",
"source": [
"feature_train_set_x.shape, feature_train_set_y.shape, feature_dev_set_x.shape, feature_dev_set_y.shape, feature_train_set_x.dtype"
],
"execution_count": 22,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"((2978, 7, 7, 1024), (2978, 7), (600, 7, 7, 1024), (600, 7), dtype('float64'))"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
}
]
},
{
"metadata": {
"id": "NLDN8kzdTosT",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"model = Sequential()\n",
"\n",
"model.add(InputLayer(input_shape=(7, 7, 1024)))\n",
"model.add(Flatten())\n",
"#model.add(Dense(1024, input_dim=7*7*1024))\n",
"model.add(Dense(1024))\n",
"model.add(BatchNormalization())\n",
"model.add(Activation('relu'))\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(7, activation='softmax'))"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "mSHbevzGTypd",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"model.compile(optimizer=optimizers.Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
"all_history = {}"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "RitWpPkYT0w0",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 445
},
"outputId": "de1a3baa-981e-499b-ce1d-30b6a9d190ef"
},
"cell_type": "code",
"source": [
"batch_size = 32\n",
"# use data generator\n",
"train_datagen = ImageDataGenerator()\n",
"test_datagen = ImageDataGenerator()\n",
"\n",
"train_generator = train_datagen.flow(feature_train_set_x, feature_train_set_y, batch_size=batch_size)\n",
"validation_generator = test_datagen.flow(feature_dev_set_x, feature_dev_set_y, batch_size=batch_size)\n",
"\n",
"\n",
"history = model.fit_generator(train_generator, steps_per_epoch=len(feature_train_set_y)//batch_size, epochs=10, \n",
" validation_data=validation_generator, validation_steps=len(feature_dev_set_y)//batch_size)\n",
"\n",
"# default way\n",
"#history = model.fit(feature_train_set_x, feature_train_set_y, epochs=10, batch_size=batch_size,\n",
"# validation_data=(feature_dev_set_x, feature_dev_set_y)\n",
"# )"
],
"execution_count": 30,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py:1144: UserWarning: NumpyArrayIterator is set to use the data format convention \"channels_last\" (channels on axis 3), i.e. expected either 1, 3 or 4 channels on axis 3. However, it was passed an array with shape (2978, 7, 7, 1024) (1024 channels).\n",
" ' (' + str(self.x.shape[channels_axis]) + ' channels).')\n",
"/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py:1144: UserWarning: NumpyArrayIterator is set to use the data format convention \"channels_last\" (channels on axis 3), i.e. expected either 1, 3 or 4 channels on axis 3. However, it was passed an array with shape (600, 7, 7, 1024) (1024 channels).\n",
" ' (' + str(self.x.shape[channels_axis]) + ' channels).')\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"93/93 [==============================] - 9s 94ms/step - loss: 1.1749 - acc: 0.6085 - val_loss: 0.8274 - val_acc: 0.7049\n",
"Epoch 2/10\n",
"93/93 [==============================] - 8s 81ms/step - loss: 0.4332 - acc: 0.8599 - val_loss: 0.7917 - val_acc: 0.7153\n",
"Epoch 3/10\n",
"93/93 [==============================] - 7s 79ms/step - loss: 0.2851 - acc: 0.9141 - val_loss: 0.7107 - val_acc: 0.7431\n",
"Epoch 4/10\n",
"80/93 [========================>.....] - ETA: 0s - loss: 0.1671 - acc: 0.9582"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"93/93 [==============================] - 7s 79ms/step - loss: 0.1675 - acc: 0.9550 - val_loss: 0.6999 - val_acc: 0.7500\n",
"Epoch 5/10\n",
"93/93 [==============================] - 7s 79ms/step - loss: 0.1277 - acc: 0.9681 - val_loss: 0.6505 - val_acc: 0.7552\n",
"Epoch 6/10\n",
"93/93 [==============================] - 7s 79ms/step - loss: 0.0933 - acc: 0.9802 - val_loss: 0.6709 - val_acc: 0.7674\n",
"Epoch 7/10\n",
"93/93 [==============================] - 7s 79ms/step - loss: 0.0730 - acc: 0.9879 - val_loss: 0.6284 - val_acc: 0.7674\n",
"Epoch 8/10\n",
" 6/93 [>.............................] - ETA: 6s - loss: 0.0995 - acc: 0.9635"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"93/93 [==============================] - 7s 79ms/step - loss: 0.0578 - acc: 0.9916 - val_loss: 0.6399 - val_acc: 0.7639\n",
"Epoch 9/10\n",
"93/93 [==============================] - 7s 79ms/step - loss: 0.0488 - acc: 0.9956 - val_loss: 0.6805 - val_acc: 0.7622\n",
"Epoch 10/10\n",
"93/93 [==============================] - 7s 80ms/step - loss: 0.0537 - acc: 0.9850 - val_loss: 0.7092 - val_acc: 0.7552\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "DcNdpL6bT2nK",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment