Created
December 21, 2016 17:43
-
-
Save grigorisg9gr/b344260171538bc8711834c8de82e3a7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from os.path import isdir\n", | |
"import numpy as np\n", | |
"\n", | |
"try:\n", | |
" from menpo.io import import_images, import_builtin_asset\n", | |
"except ImportError:\n", | |
" m2 = ('The menpo package from http://www.menpo.org/'\n", | |
" 'is required, please install it.')\n", | |
" print(m2)\n", | |
"\n", | |
"\n", | |
"try:\n", | |
" from research_pyutils import resize_all_images\n", | |
"except ImportError:\n", | |
" m1 = ('The library of pyutils is not installed, so '\n", | |
" 'please download it from: \\n'\n", | |
" 'https://github.com/grigorisg9gr/pyutils')\n", | |
" print(m1)\n", | |
"\n", | |
"try:\n", | |
" %matplotlib inline\n", | |
"except NameError:\n", | |
" print('Probably a terminal, it\\'s not required.')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Get the input samples, pre-process them" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# path for the positive samples (human faces).\n", | |
"# Images from the '300 faces in-the-wild' challenge.\n", | |
"p_faces = '/vol/atlas/databases/300w/'\n", | |
"# path for the negative samples.\n", | |
"# The data are from the PASCAL dataset, those that do not include\n", | |
"# any type of human/face annotation.\n", | |
"p_neg = '/vol/atlas/homes/pts08/non_person_images/'\n", | |
"assert isdir(p_faces) and isdir(p_neg)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# import the 'positive' samples.\n", | |
"ims = import_images(p_faces, verbose=True, max_images=200)\n", | |
"# pre-processing (cropping)\n", | |
"pos_ims = [im.crop_to_landmarks_proportion(0.2) for im in ims]\n", | |
"# get a sample shape to extract a patch (a rectangle part \n", | |
"# of the image).\n", | |
"shape = pos_ims[0].shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# import the 'negative' samples.\n", | |
"neg_ims = import_images(p_neg, max_images=200, verbose=True)\n", | |
"# get a patch from each image: in the same shape as the \n", | |
"# positive samples.\n", | |
"neg_ims = [im.crop((0, 0), shape) for im in neg_ims]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"nr_positives = len(pos_ims)\n", | |
"# append the two lists, the first few will be the positives ones.\n", | |
"images = pos_ims + neg_ims\n", | |
"# resize all the images into the same size.\n", | |
"images = resize_all_images(images)\n", | |
"shape = images[0].shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# one last step: get only the pixel values.\n", | |
"pixels = []\n", | |
"for im in images:\n", | |
" if im.n_channels == 3:\n", | |
" im = im.as_greyscale()\n", | |
" pixels.append(im.pixels_with_channels_at_back())\n", | |
"\n", | |
"# convert the pixels into a numpy array and rescale to 2d \n", | |
"# for scikit learn (most of their learning models expect \n", | |
"# 2d numpy arrays).\n", | |
"pixels = np.reshape(np.array(pixels, dtype=np.float32), (len(pixels), -1))\n", | |
"# also reshape since the input is expected as a 2d matrix.\n", | |
"# Create the labels.\n", | |
"labels = np.zeros((len(images)), dtype=np.float32)\n", | |
"labels[:nr_positives] = 1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Model learning" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# # shuffle the arrays (per rows)\n", | |
"p = np.random.permutation(pixels.shape[0])\n", | |
"pixels = pixels[p]\n", | |
"labels = labels[p]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Using the scikit learn, the model selected is a simple Linear SVM, \n", | |
"# however you can choose any type of model, visit their documentation\n", | |
"# for several examples from different application areas.\n", | |
"from sklearn import svm\n", | |
"print('\\nTraining SVM')\n", | |
"cl1 = svm.LinearSVC(verbose=1).fit(pixels, labels)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Validate your model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"val_im = import_builtin_asset.takeo_ppm().as_greyscale()\n", | |
"# ensure that it has the appropriate dimensionality for comparing with \n", | |
"# the pre-trained classifier.\n", | |
"val_im = val_im.crop_to_landmarks_proportion(0.4).resize(shape)\n", | |
"val_px = val_im.pixels_with_channels_at_back(out_dtype=np.float32)\n", | |
"val_px = val_px.reshape((1, -1))\n", | |
"decision = cl1.predict(val_px)\n", | |
"m1 = 'The classifier decided that it is{} a face.'\n", | |
"msg = '' if decision[0] == 1 else ' not'\n", | |
"print(m1.format(msg))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# view your validated sample.\n", | |
"try:\n", | |
" val_im.view()\n", | |
"except:\n", | |
" # e.g. in a case of a terminal, this won't probably show.\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Improvements" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"A couple of improvements in the simple aforementioned approach would be:\n", | |
"****************************\n", | |
"****************************\n", | |
"\n", | |
"1) Extract features instead of feeding the pixel intensities right away.\n", | |
"****************************\n", | |
"\n", | |
"2) Extract patches around landmark points (just visualise one of the\n", | |
"\n", | |
" positive images with ims[0].view_landmarks() to understand). The\n", | |
" \n", | |
" motivation is that the patches do not include so much noise as the\n", | |
" \n", | |
" whole crude bounding box around the face.\n", | |
"****************************\n", | |
" \n", | |
"3) Try different learning methods, scikit includes plenty of them.\n", | |
"****************************\n", | |
"\n", | |
"4) Extract negative patches in different scales, possibly in the positive\n", | |
"\n", | |
" as well. There is no guarantee that the faces will appear in the same\n", | |
" \n", | |
" size/shape as in the training set.\n", | |
" \n", | |
"****************************\n", | |
"\n", | |
"5) Create a validation set, see where this simple approach fails and\n", | |
"\n", | |
" try to improve those.\n", | |
"****************************\n", | |
" \n", | |
"6) Perturb the bounding boxes/patches around the face to allow some slack.\n", | |
"\n", | |
" Even if it does not return the exact bounding box, it might still be a\n", | |
" \n", | |
" face, so allow the algorithm to consider such perturbations as positive.\n", | |
"****************************\n", | |
"\n", | |
"7) Get a random patch from each negative image, not a fixed one. You might\n", | |
"\n", | |
" even consider several patches, e.g. 50, per image." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment