Skip to content

Instantly share code, notes, and snippets.

@muety
Created July 27, 2019 15:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save muety/404befcfb2eef4b59398f3c8590ce692 to your computer and use it in GitHub Desktop.
Save muety/404befcfb2eef4b59398f3c8590ce692 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import os\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from itertools import product\n",
"from keras.engine import Model\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, Flatten\n",
"from keras.layers import Conv2D, MaxPooling2D\n",
"from keras.callbacks import ModelCheckpoint, Callback\n",
"from keras.preprocessing.image import load_img, img_to_array, array_to_img, ImageDataGenerator\n",
"from keras_vggface.vggface import VGGFace\n",
"from sklearn.metrics import confusion_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Constants"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DATA_DIR = '../data/preprocessed/'\n",
"BATCH_SIZE = 32\n",
"GRAYSCALE = False\n",
"AUGMENTATION_FACTOR = 3\n",
"EPOCHS = 100\n",
"RANDOM_SEED = 123"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load VGGFace finetuned model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"INPUT_DIM = (128, 128, 1 if GRAYSCALE else 3)\n",
"\n",
"def get_model():\n",
" vgg_model = VGGFace(include_top=False, input_shape=INPUT_DIM, pooling='max')\n",
"\n",
" top_model = Sequential(name='top')\n",
" top_model.add(Dense(128, activation='relu', input_shape=vgg_model.output_shape[1:]))\n",
" top_model.add(Dropout(0.5))\n",
" top_model.add(Dense(4, activation='softmax'))\n",
" \n",
" for layer in vgg_model.layers[:-3]:\n",
" layer.trainable = False\n",
" \n",
" model = Sequential()\n",
" model.add(vgg_model)\n",
" model.add(top_model)\n",
" \n",
" model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])\n",
" \n",
" return model\n",
"\n",
"model = get_model()\n",
"\n",
"model.load_weights('final-22-0.546.hdf5')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load custom model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"INPUT_DIM = (64, 64, 1 if GRAYSCALE else 3)\n",
"\n",
"def get_model():\n",
" model = Sequential()\n",
" model.add(Conv2D(32, (3, 3), activation='relu', input_shape=INPUT_DIM))\n",
" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
"\n",
" model.add(Conv2D(32, (3, 3), activation='relu'))\n",
" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
"\n",
" model.add(Conv2D(32, (3, 3), activation='relu'))\n",
" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
" \n",
" model.add(Flatten())\n",
" model.add(Dense(64, activation='relu'))\n",
" model.add(Dropout(0.5))\n",
" model.add(Dense(4, activation='softmax'))\n",
" \n",
" model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])\n",
" return model\n",
"\n",
"model = get_model()\n",
"\n",
"model.load_weights('rgb-29-0.57.hdf5')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data generator"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_datagen = ImageDataGenerator(\n",
" rotation_range=10,\n",
" rescale=1./255,\n",
" shear_range=0.2,\n",
" zoom_range=0.2,\n",
" horizontal_flip=True)\n",
"\n",
"datagen = ImageDataGenerator(rescale=1./255)\n",
"\n",
"generator_base_params = {\n",
" 'target_size': INPUT_DIM[:2],\n",
" 'class_mode': 'categorical',\n",
" 'color_mode': 'grayscale' if GRAYSCALE else 'rgb',\n",
" 'batch_size': BATCH_SIZE,\n",
" 'seed': RANDOM_SEED\n",
"}\n",
"\n",
"train_generator = train_datagen.flow_from_directory(DATA_DIR + 'train', shuffle=True, **generator_base_params)\n",
"test_generator = datagen.flow_from_directory(DATA_DIR + 'test', shuffle=True, **generator_base_params)\n",
"\n",
"n_train = train_generator.n\n",
"n_test = test_generator.n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Investigating the model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_generator.shuffle = False\n",
"test_generator.batch_size = 1\n",
"test_generator.reset()\n",
"\n",
"tuples = [test_generator.next() for i in range(n_test)]\n",
"x_test, y_real = np.array([x[0][0] for x in tuples]), np.array([x[1][0] for x in tuples])\n",
"\n",
"y_pred = model.predict(x_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Class distributions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"classes = ['cs', 'econ', 'german', 'mechanical']\n",
"dist_real = np.bincount(y_real.argmax(axis=1)) / y_real.shape[0]\n",
"dist_pred = np.bincount(y_pred.argmax(axis=1)) / y_pred.shape[0]\n",
"print(classes)\n",
"print(dist_real)\n",
"print(dist_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Examples of false classifications"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def print_samples(class_index):\n",
" top_n = 5\n",
" c = class_index\n",
" \n",
" fig = plt.figure(figsize=(8, 8))\n",
" \n",
" idx1 = np.where(y_real[:,c] == 1)\n",
" idx2 = np.where(y_real[:,c] == 0)\n",
" top_false_neg = np.abs((y_real[idx1, c] - y_pred[idx1, c])[0]).argsort()[-top_n:][::-1]\n",
" top_false_pos = np.abs((y_real[idx2, c] - y_pred[idx2, c])[0]).argsort()[-top_n:][::-1]\n",
" top_true_pos = np.abs((y_real[idx1, c] - y_pred[idx1, c])[0]).argsort()[:top_n]\n",
"\n",
" for i in range(top_n):\n",
" img = array_to_img(x_test[idx1[0][top_false_neg[i]]])\n",
" fig.add_subplot(3, top_n, i + 1)\n",
" plt.imshow(img)\n",
" \n",
" for i in range(top_n):\n",
" img = array_to_img(x_test[idx2[0][top_false_pos[i]]])\n",
" fig.add_subplot(3, top_n, i + 1 + top_n)\n",
" plt.imshow(img)\n",
" \n",
" for i in range(top_n):\n",
" img = array_to_img(x_test[idx1[0][top_true_pos[i]]])\n",
" fig.add_subplot(3, top_n, i + 1 + 2 * top_n)\n",
" plt.imshow(img)\n",
" \n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *cs*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks computer scientists do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks computer scientists DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print_samples(0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *econ*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks economists do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks economists DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print_samples(1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *german*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks German linguists do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks German linguists DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print_samples(2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *mechanical*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks mechanical engineers do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks mechanical engineers DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print_samples(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Confusion"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"confmat = confusion_matrix(y_real.argmax(axis=1), y_pred.argmax(axis=1)) / n_test\n",
"confmat * 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.matshow(confmat)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Backup Code"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Backup code\n",
"\n",
"confusion = np.zeros((4, 4)) # NOT a confusion matrix\n",
"\n",
"for t in product(range(4), range(4)):\n",
" mask1 = y_pred.argmax(axis=1) == t[0]\n",
" mask2 = y_real.argmax(axis=1) == t[1]\n",
" confusion[t[0], t[1]] = np.array((mask1 == mask2)).sum() / n_test\n",
" \n",
"plt.matshow(confusion)\n",
"\n",
"[(classes[x // 4], classes[x % 4], confusion.ravel()[x]) for x in confusion.argsort(axis=None)[::-1] if x // 4 != x % 4]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment