grigorisg9gr/[eestec]Machine_learning_seminar_12_2016_simple_isface_classifier.ipynb

## [eestec]Machine_learning_seminar_12_2016_simple_isface_classifier.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from os.path import isdir\n",
    "import numpy as np\n",
    "\n",
    "try:\n",
    "    from menpo.io import import_images, import_builtin_asset\n",
    "except ImportError:\n",
    "    m2 = ('The menpo package from http://www.menpo.org/'\n",
    "          'is required, please install it.')\n",
    "    print(m2)\n",
    "\n",
    "\n",
    "try:\n",
    "    from research_pyutils import resize_all_images\n",
    "except ImportError:\n",
    "    m1 = ('The library of pyutils is not installed, so '\n",
    "          'please download it from: \\n'\n",
    "          'https://github.com/grigorisg9gr/pyutils')\n",
    "    print(m1)\n",
    "\n",
    "try:\n",
    "    %matplotlib inline\n",
    "except NameError:\n",
    "    print('Probably a terminal, it\\'s not required.')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Get the input samples, pre-process them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# path for the positive samples (human faces).\n",
    "# Images from the '300 faces in-the-wild' challenge.\n",
    "p_faces = '/vol/atlas/databases/300w/'\n",
    "# path for the negative samples.\n",
    "# The data are from the PASCAL dataset, those that do not include\n",
    "# any type of human/face annotation.\n",
    "p_neg = '/vol/atlas/homes/pts08/non_person_images/'\n",
    "assert isdir(p_faces) and isdir(p_neg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# import the 'positive' samples.\n",
    "ims = import_images(p_faces, verbose=True, max_images=200)\n",
    "# pre-processing (cropping)\n",
    "pos_ims = [im.crop_to_landmarks_proportion(0.2) for im in ims]\n",
    "# get a sample shape to extract a patch (a rectangle part \n",
    "# of the image).\n",
    "shape = pos_ims[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# import the 'negative' samples.\n",
    "neg_ims = import_images(p_neg, max_images=200, verbose=True)\n",
    "# get a patch from each image: in the same shape as the \n",
    "# positive samples.\n",
    "neg_ims = [im.crop((0, 0), shape) for im in neg_ims]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "nr_positives = len(pos_ims)\n",
    "# append the two lists, the first few will be the positives ones.\n",
    "images = pos_ims + neg_ims\n",
    "# resize all the images into the same size.\n",
    "images = resize_all_images(images)\n",
    "shape = images[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# one last step: get only the pixel values.\n",
    "pixels = []\n",
    "for im in images:\n",
    "    if im.n_channels == 3:\n",
    "        im = im.as_greyscale()\n",
    "    pixels.append(im.pixels_with_channels_at_back())\n",
    "\n",
    "# convert the pixels into a numpy array and rescale to 2d \n",
    "# for scikit learn (most of  their learning models expect \n",
    "# 2d numpy arrays).\n",
    "pixels = np.reshape(np.array(pixels, dtype=np.float32), (len(pixels), -1))\n",
    "# also reshape since the input is expected as a 2d matrix.\n",
    "# Create the labels.\n",
    "labels = np.zeros((len(images)), dtype=np.float32)\n",
    "labels[:nr_positives] = 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Model learning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# # shuffle the arrays (per rows)\n",
    "p = np.random.permutation(pixels.shape[0])\n",
    "pixels = pixels[p]\n",
    "labels = labels[p]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Using the scikit learn, the model selected is a simple Linear SVM, \n",
    "# however you can choose any type of model, visit their documentation\n",
    "# for several examples from different application areas.\n",
    "from sklearn import svm\n",
    "print('\\nTraining SVM')\n",
    "cl1 = svm.LinearSVC(verbose=1).fit(pixels, labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Validate your model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "val_im = import_builtin_asset.takeo_ppm().as_greyscale()\n",
    "# ensure that it has the appropriate dimensionality for comparing with \n",
    "# the pre-trained classifier.\n",
    "val_im = val_im.crop_to_landmarks_proportion(0.4).resize(shape)\n",
    "val_px = val_im.pixels_with_channels_at_back(out_dtype=np.float32)\n",
    "val_px = val_px.reshape((1, -1))\n",
    "decision = cl1.predict(val_px)\n",
    "m1 = 'The classifier decided that it is{} a face.'\n",
    "msg = '' if decision[0] == 1 else ' not'\n",
    "print(m1.format(msg))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# view your validated sample.\n",
    "try:\n",
    "    val_im.view()\n",
    "except:\n",
    "    # e.g. in a case of a terminal, this won't probably show.\n",
    "    pass"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Improvements"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A couple of improvements in the simple aforementioned approach would be:\n",
    "****************************\n",
    "****************************\n",
    "\n",
    "1) Extract features instead of feeding the pixel intensities right away.\n",
    "****************************\n",
    "\n",
    "2) Extract patches around landmark points (just visualise one of the\n",
    "\n",
    "   positive images with ims[0].view_landmarks() to understand). The\n",
    "   \n",
    "   motivation is that the patches do not include so much noise as the\n",
    "   \n",
    "   whole crude bounding box around the face.\n",
    "****************************\n",
    "   \n",
    "3) Try different learning methods, scikit includes plenty of them.\n",
    "****************************\n",
    "\n",
    "4) Extract negative patches in different scales, possibly in the positive\n",
    "\n",
    "   as well. There is no guarantee that the faces will appear in the same\n",
    "   \n",
    "   size/shape as in the training set.\n",
    "   \n",
    "****************************\n",
    "\n",
    "5) Create a validation set, see where this simple approach fails and\n",
    "\n",
    "   try to improve those.\n",
    "****************************\n",
    "   \n",
    "6) Perturb the bounding boxes/patches around the face to allow some slack.\n",
    "\n",
    "   Even if it does not return the exact bounding box, it might still be a\n",
    "   \n",
    "   face, so allow the algorithm to consider such perturbations as positive.\n",
    "****************************\n",
    "\n",
    "7) Get a random patch from each negative image, not a fixed one. You might\n",
    "\n",
    "   even consider several patches, e.g. 50, per image."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from os.path import isdir\n",
	"import numpy as np\n",
	"\n",
	"try:\n",
	" from menpo.io import import_images, import_builtin_asset\n",
	"except ImportError:\n",
	" m2 = ('The menpo package from http://www.menpo.org/'\n",
	" 'is required, please install it.')\n",
	" print(m2)\n",
	"\n",
	"\n",
	"try:\n",
	" from research_pyutils import resize_all_images\n",
	"except ImportError:\n",
	" m1 = ('The library of pyutils is not installed, so '\n",
	" 'please download it from: \\n'\n",
	" 'https://github.com/grigorisg9gr/pyutils')\n",
	" print(m1)\n",
	"\n",
	"try:\n",
	" %matplotlib inline\n",
	"except NameError:\n",
	" print('Probably a terminal, it\\'s not required.')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Get the input samples, pre-process them"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# path for the positive samples (human faces).\n",
	"# Images from the '300 faces in-the-wild' challenge.\n",
	"p_faces = '/vol/atlas/databases/300w/'\n",
	"# path for the negative samples.\n",
	"# The data are from the PASCAL dataset, those that do not include\n",
	"# any type of human/face annotation.\n",
	"p_neg = '/vol/atlas/homes/pts08/non_person_images/'\n",
	"assert isdir(p_faces) and isdir(p_neg)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# import the 'positive' samples.\n",
	"ims = import_images(p_faces, verbose=True, max_images=200)\n",
	"# pre-processing (cropping)\n",
	"pos_ims = [im.crop_to_landmarks_proportion(0.2) for im in ims]\n",
	"# get a sample shape to extract a patch (a rectangle part \n",
	"# of the image).\n",
	"shape = pos_ims[0].shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# import the 'negative' samples.\n",
	"neg_ims = import_images(p_neg, max_images=200, verbose=True)\n",
	"# get a patch from each image: in the same shape as the \n",
	"# positive samples.\n",
	"neg_ims = [im.crop((0, 0), shape) for im in neg_ims]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"nr_positives = len(pos_ims)\n",
	"# append the two lists, the first few will be the positives ones.\n",
	"images = pos_ims + neg_ims\n",
	"# resize all the images into the same size.\n",
	"images = resize_all_images(images)\n",
	"shape = images[0].shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# one last step: get only the pixel values.\n",
	"pixels = []\n",
	"for im in images:\n",
	" if im.n_channels == 3:\n",
	" im = im.as_greyscale()\n",
	" pixels.append(im.pixels_with_channels_at_back())\n",
	"\n",
	"# convert the pixels into a numpy array and rescale to 2d \n",
	"# for scikit learn (most of their learning models expect \n",
	"# 2d numpy arrays).\n",
	"pixels = np.reshape(np.array(pixels, dtype=np.float32), (len(pixels), -1))\n",
	"# also reshape since the input is expected as a 2d matrix.\n",
	"# Create the labels.\n",
	"labels = np.zeros((len(images)), dtype=np.float32)\n",
	"labels[:nr_positives] = 1"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Model learning"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# # shuffle the arrays (per rows)\n",
	"p = np.random.permutation(pixels.shape[0])\n",
	"pixels = pixels[p]\n",
	"labels = labels[p]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Using the scikit learn, the model selected is a simple Linear SVM, \n",
	"# however you can choose any type of model, visit their documentation\n",
	"# for several examples from different application areas.\n",
	"from sklearn import svm\n",
	"print('\\nTraining SVM')\n",
	"cl1 = svm.LinearSVC(verbose=1).fit(pixels, labels)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Validate your model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"val_im = import_builtin_asset.takeo_ppm().as_greyscale()\n",
	"# ensure that it has the appropriate dimensionality for comparing with \n",
	"# the pre-trained classifier.\n",
	"val_im = val_im.crop_to_landmarks_proportion(0.4).resize(shape)\n",
	"val_px = val_im.pixels_with_channels_at_back(out_dtype=np.float32)\n",
	"val_px = val_px.reshape((1, -1))\n",
	"decision = cl1.predict(val_px)\n",
	"m1 = 'The classifier decided that it is{} a face.'\n",
	"msg = '' if decision[0] == 1 else ' not'\n",
	"print(m1.format(msg))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# view your validated sample.\n",
	"try:\n",
	" val_im.view()\n",
	"except:\n",
	" # e.g. in a case of a terminal, this won't probably show.\n",
	" pass"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Improvements"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"A couple of improvements in the simple aforementioned approach would be:\n",
	"****************************\n",
	"****************************\n",
	"\n",
	"1) Extract features instead of feeding the pixel intensities right away.\n",
	"****************************\n",
	"\n",
	"2) Extract patches around landmark points (just visualise one of the\n",
	"\n",
	" positive images with ims[0].view_landmarks() to understand). The\n",
	" \n",
	" motivation is that the patches do not include so much noise as the\n",
	" \n",
	" whole crude bounding box around the face.\n",
	"****************************\n",
	" \n",
	"3) Try different learning methods, scikit includes plenty of them.\n",
	"****************************\n",
	"\n",
	"4) Extract negative patches in different scales, possibly in the positive\n",
	"\n",
	" as well. There is no guarantee that the faces will appear in the same\n",
	" \n",
	" size/shape as in the training set.\n",
	" \n",
	"****************************\n",
	"\n",
	"5) Create a validation set, see where this simple approach fails and\n",
	"\n",
	" try to improve those.\n",
	"****************************\n",
	" \n",
	"6) Perturb the bounding boxes/patches around the face to allow some slack.\n",
	"\n",
	" Even if it does not return the exact bounding box, it might still be a\n",
	" \n",
	" face, so allow the algorithm to consider such perturbations as positive.\n",
	"****************************\n",
	"\n",
	"7) Get a random patch from each negative image, not a fixed one. You might\n",
	"\n",
	" even consider several patches, e.g. 50, per image."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.4.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}