muety/custom_model.ipynb

## custom_model.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import math\n",
    "import matplotlib.pyplot as plt\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Dropout, Flatten\n",
    "from keras.layers import Conv2D, MaxPooling2D\n",
    "from keras.optimizers import Adam, SGD\n",
    "from keras.callbacks import ModelCheckpoint\n",
    "from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator\n",
    "from PIL import Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATA_DIR = '../data/preprocessed/'\n",
    "BATCH_SIZE = 32\n",
    "BATCH_SIZE_TEST = 16\n",
    "GRAYSCALE = True\n",
    "INPUT_DIM = (64, 64, 1 if GRAYSCALE else 3)\n",
    "AUGMENTATION_FACTOR = 3\n",
    "EPOCHS = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_datagen = ImageDataGenerator(\n",
    "        rotation_range=10,\n",
    "        rescale=1./255,\n",
    "        shear_range=0.2,\n",
    "        zoom_range=0.2,\n",
    "        horizontal_flip=True)\n",
    "\n",
    "test_datagen = ImageDataGenerator(rescale=1./255)\n",
    "\n",
    "train_generator = train_datagen.flow_from_directory(\n",
    "        DATA_DIR + 'train',\n",
    "        target_size=INPUT_DIM[:2],\n",
    "        batch_size=BATCH_SIZE,\n",
    "        class_mode='categorical',\n",
    "        color_mode='grayscale' if GRAYSCALE else 'rgb')\n",
    "\n",
    "validation_generator = test_datagen.flow_from_directory(\n",
    "        DATA_DIR + 'validation',\n",
    "        target_size=INPUT_DIM[:2],\n",
    "        batch_size=BATCH_SIZE_TEST,\n",
    "        class_mode='categorical',\n",
    "        color_mode='grayscale' if GRAYSCALE else 'rgb')\n",
    "\n",
    "test_generator = test_datagen.flow_from_directory(\n",
    "        DATA_DIR + 'test',\n",
    "        target_size=INPUT_DIM[:2],\n",
    "        batch_size=BATCH_SIZE_TEST,\n",
    "        class_mode='categorical',\n",
    "        color_mode='grayscale' if GRAYSCALE else 'rgb')\n",
    "\n",
    "n_train = train_generator.n\n",
    "n_validation = validation_generator.n\n",
    "n_test = test_generator.n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_model():\n",
    "    model = Sequential()\n",
    "    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=INPUT_DIM))\n",
    "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
    "\n",
    "    model.add(Conv2D(32, (3, 3), activation='relu'))\n",
    "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
    "\n",
    "    model.add(Conv2D(32, (3, 3), activation='relu'))\n",
    "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
    "    \n",
    "    model.add(Flatten())\n",
    "    model.add(Dense(64, activation='relu'))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Dense(4, activation='softmax'))\n",
    "    \n",
    "    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = get_model()\n",
    "name_prefix = 'gray' if GRAYSCALE else 'rgb'\n",
    "callbacks = [ModelCheckpoint(name_prefix + '-{epoch:02d}-{val_acc:.2f}.hdf5', monitor='val_acc', verbose=1, save_best_only=False, mode='max')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit_generator(\n",
    "        train_generator,\n",
    "        steps_per_epoch= (n_train // BATCH_SIZE) * AUGMENTATION_FACTOR,\n",
    "        epochs=EPOCHS,\n",
    "        validation_data=validation_generator,\n",
    "        validation_steps=n_validation // BATCH_SIZE_TEST,\n",
    "        callbacks=callbacks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(history.history['acc'])\n",
    "plt.plot(history.history['val_acc'])\n",
    "plt.title('Model accuracy')\n",
    "plt.ylabel('Accuracy')\n",
    "plt.xlabel('Epoch')\n",
    "plt.legend(['Train', 'Test'], loc='upper left')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modelfiles = [f for f in os.listdir('.') if f.endswith('.hdf5') and f.startswith('rgb' if not GRAYSCALE else 'gray')]\n",
    "for f in modelfiles:\n",
    "    model.load_weights(f)\n",
    "    result = model.evaluate_generator(\n",
    "        test_generator,\n",
    "        steps=n_test // BATCH_SIZE\n",
    "    )\n",
    "    print(f'{f}: {result[1]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(model.predict(img_to_array(examples_cs[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))\n",
    "print(model.predict(img_to_array(examples_econ[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))\n",
    "print(model.predict(img_to_array(examples_german[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))\n",
    "print(model.predict(img_to_array(examples_mechanical[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "test_generator.class_indices"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Some results\n",
    "* 64x64x3, 100 epochs, 3x augmentation, simple net: **0.571**\n",
    "* 64x64x1, 100 epochs, 3x augmentation, simple net: **0.500**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"import math\n",
	"import matplotlib.pyplot as plt\n",
	"from keras.models import Sequential\n",
	"from keras.layers import Dense, Dropout, Flatten\n",
	"from keras.layers import Conv2D, MaxPooling2D\n",
	"from keras.optimizers import Adam, SGD\n",
	"from keras.callbacks import ModelCheckpoint\n",
	"from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator\n",
	"from PIL import Image"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"DATA_DIR = '../data/preprocessed/'\n",
	"BATCH_SIZE = 32\n",
	"BATCH_SIZE_TEST = 16\n",
	"GRAYSCALE = True\n",
	"INPUT_DIM = (64, 64, 1 if GRAYSCALE else 3)\n",
	"AUGMENTATION_FACTOR = 3\n",
	"EPOCHS = 100"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"train_datagen = ImageDataGenerator(\n",
	" rotation_range=10,\n",
	" rescale=1./255,\n",
	" shear_range=0.2,\n",
	" zoom_range=0.2,\n",
	" horizontal_flip=True)\n",
	"\n",
	"test_datagen = ImageDataGenerator(rescale=1./255)\n",
	"\n",
	"train_generator = train_datagen.flow_from_directory(\n",
	" DATA_DIR + 'train',\n",
	" target_size=INPUT_DIM[:2],\n",
	" batch_size=BATCH_SIZE,\n",
	" class_mode='categorical',\n",
	" color_mode='grayscale' if GRAYSCALE else 'rgb')\n",
	"\n",
	"validation_generator = test_datagen.flow_from_directory(\n",
	" DATA_DIR + 'validation',\n",
	" target_size=INPUT_DIM[:2],\n",
	" batch_size=BATCH_SIZE_TEST,\n",
	" class_mode='categorical',\n",
	" color_mode='grayscale' if GRAYSCALE else 'rgb')\n",
	"\n",
	"test_generator = test_datagen.flow_from_directory(\n",
	" DATA_DIR + 'test',\n",
	" target_size=INPUT_DIM[:2],\n",
	" batch_size=BATCH_SIZE_TEST,\n",
	" class_mode='categorical',\n",
	" color_mode='grayscale' if GRAYSCALE else 'rgb')\n",
	"\n",
	"n_train = train_generator.n\n",
	"n_validation = validation_generator.n\n",
	"n_test = test_generator.n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_model():\n",
	" model = Sequential()\n",
	" model.add(Conv2D(32, (3, 3), activation='relu', input_shape=INPUT_DIM))\n",
	" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
	"\n",
	" model.add(Conv2D(32, (3, 3), activation='relu'))\n",
	" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
	"\n",
	" model.add(Conv2D(32, (3, 3), activation='relu'))\n",
	" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
	" \n",
	" model.add(Flatten())\n",
	" model.add(Dense(64, activation='relu'))\n",
	" model.add(Dropout(0.5))\n",
	" model.add(Dense(4, activation='softmax'))\n",
	" \n",
	" model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])\n",
	" return model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"model = get_model()\n",
	"name_prefix = 'gray' if GRAYSCALE else 'rgb'\n",
	"callbacks = [ModelCheckpoint(name_prefix + '-{epoch:02d}-{val_acc:.2f}.hdf5', monitor='val_acc', verbose=1, save_best_only=False, mode='max')]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"history = model.fit_generator(\n",
	" train_generator,\n",
	" steps_per_epoch= (n_train // BATCH_SIZE) * AUGMENTATION_FACTOR,\n",
	" epochs=EPOCHS,\n",
	" validation_data=validation_generator,\n",
	" validation_steps=n_validation // BATCH_SIZE_TEST,\n",
	" callbacks=callbacks)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"plt.plot(history.history['acc'])\n",
	"plt.plot(history.history['val_acc'])\n",
	"plt.title('Model accuracy')\n",
	"plt.ylabel('Accuracy')\n",
	"plt.xlabel('Epoch')\n",
	"plt.legend(['Train', 'Test'], loc='upper left')\n",
	"plt.show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"modelfiles = [f for f in os.listdir('.') if f.endswith('.hdf5') and f.startswith('rgb' if not GRAYSCALE else 'gray')]\n",
	"for f in modelfiles:\n",
	" model.load_weights(f)\n",
	" result = model.evaluate_generator(\n",
	" test_generator,\n",
	" steps=n_test // BATCH_SIZE\n",
	" )\n",
	" print(f'{f}: {result[1]}')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"print(model.predict(img_to_array(examples_cs[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))\n",
	"print(model.predict(img_to_array(examples_econ[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))\n",
	"print(model.predict(img_to_array(examples_german[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))\n",
	"print(model.predict(img_to_array(examples_mechanical[0].convert('L')).reshape(1, *INPUT_DIM), batch_size=1))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"test_generator.class_indices"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Some results\n",
	"* 64x64x3, 100 epochs, 3x augmentation, simple net: 0.571\n",
	"* 64x64x1, 100 epochs, 3x augmentation, simple net: 0.500"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}