Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import os\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from itertools import product\n",
"from keras.engine import Model\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, Flatten\n",
"from keras.layers import Conv2D, MaxPooling2D\n",
"from keras.callbacks import ModelCheckpoint, Callback\n",
"from keras.preprocessing.image import load_img, img_to_array, array_to_img, ImageDataGenerator\n",
"from keras_vggface.vggface import VGGFace\n",
"from sklearn.metrics import confusion_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Constants"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DATA_DIR = '../data/preprocessed/'\n",
"BATCH_SIZE = 32\n",
"GRAYSCALE = False\n",
"AUGMENTATION_FACTOR = 3\n",
"EPOCHS = 100\n",
"RANDOM_SEED = 123"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load VGGFace finetuned model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"INPUT_DIM = (128, 128, 1 if GRAYSCALE else 3)\n",
"\n",
"def get_model():\n",
" vgg_model = VGGFace(include_top=False, input_shape=INPUT_DIM, pooling='max')\n",
"\n",
" top_model = Sequential(name='top')\n",
" top_model.add(Dense(128, activation='relu', input_shape=vgg_model.output_shape[1:]))\n",
" top_model.add(Dropout(0.5))\n",
" top_model.add(Dense(4, activation='softmax'))\n",
" \n",
" for layer in vgg_model.layers[:-3]:\n",
" layer.trainable = False\n",
" \n",
" model = Sequential()\n",
" model.add(vgg_model)\n",
" model.add(top_model)\n",
" \n",
" model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])\n",
" \n",
" return model\n",
"\n",
"model = get_model()\n",
"\n",
"model.load_weights('final-22-0.546.hdf5')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load custom model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"INPUT_DIM = (64, 64, 1 if GRAYSCALE else 3)\n",
"\n",
"def get_model():\n",
" model = Sequential()\n",
" model.add(Conv2D(32, (3, 3), activation='relu', input_shape=INPUT_DIM))\n",
" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
"\n",
" model.add(Conv2D(32, (3, 3), activation='relu'))\n",
" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
"\n",
" model.add(Conv2D(32, (3, 3), activation='relu'))\n",
" model.add(MaxPooling2D(pool_size=(2, 2)))\n",
" \n",
" model.add(Flatten())\n",
" model.add(Dense(64, activation='relu'))\n",
" model.add(Dropout(0.5))\n",
" model.add(Dense(4, activation='softmax'))\n",
" \n",
" model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])\n",
" return model\n",
"\n",
"model = get_model()\n",
"\n",
"model.load_weights('rgb-29-0.57.hdf5')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data generator"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_datagen = ImageDataGenerator(\n",
" rotation_range=10,\n",
" rescale=1./255,\n",
" shear_range=0.2,\n",
" zoom_range=0.2,\n",
" horizontal_flip=True)\n",
"\n",
"datagen = ImageDataGenerator(rescale=1./255)\n",
"\n",
"generator_base_params = {\n",
" 'target_size': INPUT_DIM[:2],\n",
" 'class_mode': 'categorical',\n",
" 'color_mode': 'grayscale' if GRAYSCALE else 'rgb',\n",
" 'batch_size': BATCH_SIZE,\n",
" 'seed': RANDOM_SEED\n",
"}\n",
"\n",
"train_generator = train_datagen.flow_from_directory(DATA_DIR + 'train', shuffle=True, **generator_base_params)\n",
"test_generator = datagen.flow_from_directory(DATA_DIR + 'test', shuffle=True, **generator_base_params)\n",
"\n",
"n_train = train_generator.n\n",
"n_test = test_generator.n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Investigating the model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_generator.shuffle = False\n",
"test_generator.batch_size = 1\n",
"test_generator.reset()\n",
"\n",
"tuples = [test_generator.next() for i in range(n_test)]\n",
"x_test, y_real = np.array([x[0][0] for x in tuples]), np.array([x[1][0] for x in tuples])\n",
"\n",
"y_pred = model.predict(x_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Class distributions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"classes = ['cs', 'econ', 'german', 'mechanical']\n",
"dist_real = np.bincount(y_real.argmax(axis=1)) / y_real.shape[0]\n",
"dist_pred = np.bincount(y_pred.argmax(axis=1)) / y_pred.shape[0]\n",
"print(classes)\n",
"print(dist_real)\n",
"print(dist_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Examples of false classifications"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def print_samples(class_index):\n",
" top_n = 5\n",
" c = class_index\n",
" \n",
" fig = plt.figure(figsize=(8, 8))\n",
" \n",
" idx1 = np.where(y_real[:,c] == 1)\n",
" idx2 = np.where(y_real[:,c] == 0)\n",
" top_false_neg = np.abs((y_real[idx1, c] - y_pred[idx1, c])[0]).argsort()[-top_n:][::-1]\n",
" top_false_pos = np.abs((y_real[idx2, c] - y_pred[idx2, c])[0]).argsort()[-top_n:][::-1]\n",
" top_true_pos = np.abs((y_real[idx1, c] - y_pred[idx1, c])[0]).argsort()[:top_n]\n",
"\n",
" for i in range(top_n):\n",
" img = array_to_img(x_test[idx1[0][top_false_neg[i]]])\n",
" fig.add_subplot(3, top_n, i + 1)\n",
" plt.imshow(img)\n",
" \n",
" for i in range(top_n):\n",
" img = array_to_img(x_test[idx2[0][top_false_pos[i]]])\n",
" fig.add_subplot(3, top_n, i + 1 + top_n)\n",
" plt.imshow(img)\n",
" \n",
" for i in range(top_n):\n",
" img = array_to_img(x_test[idx1[0][top_true_pos[i]]])\n",
" fig.add_subplot(3, top_n, i + 1 + 2 * top_n)\n",
" plt.imshow(img)\n",
" \n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *cs*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks computer scientists do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks computer scientists DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print_samples(0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *econ*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks economists do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks economists DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print_samples(1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *german*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks German linguists do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks German linguists DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print_samples(2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Class: *mechanical*\n",
"**1st row:** Top n false negatives (What the model mistakenly thinks mechanical engineers do NOT look like)\n",
"\n",
"**2nd row:** Top n false positives (What the model mistakenly thinks mechanical engineers DO look like)\n",
"\n",
"**3rd row:** Top n true positives"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print_samples(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Confusion"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"confmat = confusion_matrix(y_real.argmax(axis=1), y_pred.argmax(axis=1)) / n_test\n",
"confmat * 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.matshow(confmat)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Backup Code"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Backup code\n",
"\n",
"confusion = np.zeros((4, 4)) # NOT a confusion matrix\n",
"\n",
"for t in product(range(4), range(4)):\n",
" mask1 = y_pred.argmax(axis=1) == t[0]\n",
" mask2 = y_real.argmax(axis=1) == t[1]\n",
" confusion[t[0], t[1]] = np.array((mask1 == mask2)).sum() / n_test\n",
" \n",
"plt.matshow(confusion)\n",
"\n",
"[(classes[x // 4], classes[x % 4], confusion.ravel()[x]) for x in confusion.argsort(axis=None)[::-1] if x // 4 != x % 4]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.