jkjung-avt/SSD_layer_statistics.ipynb

## find_SSD_detection_layer.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Perform detective work to find layer and features for a specific image and label"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It may be useful to have the network diagram available as you trace backwards through the network"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set up"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Set caffe root, label map, model definition, and model weights\n",
    "#caffe_root = '../'  # this file is expected to be in {caffe_root}/examples\n",
    "caffe_root = '/work/caffe'\n",
    "voc_labelmap_file = 'data/VOC0712/labelmap_voc.prototxt'\n",
    "model_def = 'models/VGGNet/VOC0712/SSD_300x300/test.prototxt'\n",
    "model_weights = 'models/VGGNet/VOC0712/SSD_300x300/VGG_VOC0712_SSD_300x300_iter_60000.caffemodel'\n",
    "\n",
    "# Set confidence threshold (0-1) for object detection\n",
    "conf_thresh = 0.6\n",
    "\n",
    "# Set index of image of interest\n",
    "image_index = 0\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "plt.rcParams['figure.figsize'] = (10, 10)\n",
    "plt.rcParams['image.interpolation'] = 'nearest'\n",
    "plt.rcParams['image.cmap'] = 'gray'\n",
    "\n",
    "# Make sure that caffe is on the python path:\n",
    "import os\n",
    "os.chdir(caffe_root)\n",
    "import sys\n",
    "sys.path.insert(0, 'python')\n",
    "\n",
    "import caffe\n",
    "caffe.set_device(0)\n",
    "caffe.set_mode_gpu()\n",
    "from google.protobuf import text_format\n",
    "from caffe.proto import caffe_pb2\n",
    "\n",
    "# load PASCAL VOC labels\n",
    "file = open(voc_labelmap_file, 'r')\n",
    "voc_labelmap = caffe_pb2.LabelMap()\n",
    "text_format.Merge(str(file.read()), voc_labelmap)\n",
    "\n",
    "def get_labelname(labelmap, labels):\n",
    "    num_labels = len(labelmap.item)\n",
    "    labelnames = []\n",
    "    if type(labels) is not list:\n",
    "        labels = [labels]\n",
    "    for label in labels:\n",
    "        found = False\n",
    "        for i in xrange(0, num_labels):\n",
    "            if label == labelmap.item[i].label:\n",
    "                found = True\n",
    "                labelnames.append(labelmap.item[i].display_name)\n",
    "                break\n",
    "        assert found == True\n",
    "    return labelnames\n",
    "\n",
    "net = caffe.Net(model_def,      # defines the structure of the model\n",
    "                model_weights,  # contains the trained weights\n",
    "                caffe.TEST)     # use test mode (e.g., don't perform dropout)\n",
    "\n",
    "# input preprocessing: 'data' is the name of the input blob == net.inputs[0]\n",
    "transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\n",
    "transformer.set_transpose('data', (2, 0, 1))\n",
    "transformer.set_mean('data', np.array([104,117,123])) # mean pixel\n",
    "transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]\n",
    "transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Iterate to image of interest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# iterate to image of interest\n",
    "for i in range(0, image_index + 1):\n",
    "    net.forward()\n",
    "\n",
    "detections = net.blobs['detection_out'].data\n",
    "\n",
    "# Parse the outputs.\n",
    "det_label = detections[0,0,:,1]\n",
    "det_conf = detections[0,0,:,2]\n",
    "det_xmin = detections[0,0,:,3]\n",
    "det_ymin = detections[0,0,:,4]\n",
    "det_xmax = detections[0,0,:,5]\n",
    "det_ymax = detections[0,0,:,6]\n",
    "\n",
    "# Get detections with confidence higher than 0.6.\n",
    "top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]\n",
    "\n",
    "top_conf = det_conf[top_indices]\n",
    "top_label_indices = det_label[top_indices].tolist()\n",
    "top_labels = get_labelname(voc_labelmap, top_label_indices)\n",
    "top_xmin = det_xmin[top_indices]\n",
    "top_ymin = det_ymin[top_indices]\n",
    "top_xmax = det_xmax[top_indices]\n",
    "top_ymax = det_ymax[top_indices]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Print number of high confidence objects in image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num el: 1\n"
     ]
    }
   ],
   "source": [
    "numel = top_conf.size;\n",
    "print \"num el:\", str(numel)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find layer responsible for object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "object 0 of 0\n",
      "object confidence: 0.999378\n",
      "predicted label: person\n",
      "\n",
      "softmax layer shape: (1, 7308, 21)\n",
      "index in softmax layer is (array([0]), array([7273]), array([15]))\n",
      "\n",
      "output confidence came from layer conv8_2_mbox_conf_perm\n",
      "\n",
      "permuted later shape: (1, 3, 3, 126)\n",
      "index in permuted is     (array([0]), array([1]), array([1]), array([36]))\n",
      "index before premuted is [array([0]), array([36]), array([1]), array([1])]\n",
      "\n",
      "--Layer With Image Features--\n",
      "name of conv/pool/fc conf layer is conv8_2_mbox_conf\n",
      "size of conf layer is (1, 126, 3, 3)\n",
      "image features found at indeces (0,36,1,1)\n"
     ]
    }
   ],
   "source": [
    "# Set k to the object of interest\n",
    "k = numel - 1    # numel - 1 = most confident\n",
    "\n",
    "top_conf_k = top_conf[k]\n",
    "print \"object\", str(k),  \"of\", str(numel-1)\n",
    "print \"object confidence:\", str(top_conf_k)\n",
    "print \"predicted label:\",  top_labels[k]\n",
    "print\n",
    "\n",
    "# find index of this confidence in output of mbox_conf_softmax\n",
    "conf_softmax = net.blobs['mbox_conf_softmax'].data\n",
    "tconf_softmax_idx = np.where(conf_softmax == top_conf_k)\n",
    "print \"softmax layer shape:\", conf_softmax.shape\n",
    "print \"index in softmax layer is \" + str(tconf_softmax_idx)\n",
    "print\n",
    "\n",
    "# use index from  mbox_conf_softmax to find value in mbox_conf_reshape\n",
    "conf_reshape = net.blobs['mbox_conf_reshape'].data\n",
    "tconf_reshape = conf_reshape[tconf_softmax_idx[0], tconf_softmax_idx[1], tconf_softmax_idx[2]]\n",
    "\n",
    "# use value from mbox_conf_reshape to find input layer (after perm) and index of layer\n",
    "layers = [\"conv4_3_norm_mbox_conf_perm\", \"fc7_mbox_conf_perm\", \"conv6_2_mbox_conf_perm\", \"conv7_2_mbox_conf_perm\", \"conv8_2_mbox_conf_perm\",  \"pool6_mbox_conf_perm\"]\n",
    "\n",
    "for layer in layers:\n",
    "    layer_data = net.blobs[layer].data\n",
    "    if tconf_reshape in layer_data:\n",
    "        conf_perm = layer_data\n",
    "        conf_perm_name = layer\n",
    "\n",
    "conf_perm_idx = np.where(conf_perm == tconf_reshape)\n",
    "\n",
    "print \"output confidence came from layer \" + conf_perm_name\n",
    "print\n",
    "print \"permuted later shape:\", conf_perm.shape\n",
    "print \"index in permuted is     \" + str(conf_perm_idx)\n",
    "\n",
    "# get index of data before permutation using prototxt info (0,1,2,3 -> 0,2,3,1)\n",
    "# this is the index after convolving/pooling/fully connected layer\n",
    "conf_idx = [conf_perm_idx[0], conf_perm_idx[3], conf_perm_idx[1], conf_perm_idx[2]]\n",
    "print \"index before premuted is \" + str(conf_idx)\n",
    "print\n",
    "# get data from layer after conv/pool/fc\n",
    "conf_post_name = conf_perm_name[:-5]\n",
    "conf_post = net.blobs[conf_post_name].data\n",
    "\n",
    "# get bin high conf is found in\n",
    "bin = conf_idx[1] / 21\n",
    "\n",
    "# IMGAGE FEATURE LAYER AND INDECES\n",
    "print \"--Layer With Image Features--\"\n",
    "print \"name of conv/pool/fc conf layer is \" + conf_post_name\n",
    "tconf_post = conf_post[conf_idx[0], conf_idx[1], conf_idx[2], conf_idx[3]]\n",
    "print \"size of conf layer is \" + str(net.blobs[conf_post_name].data.shape)\n",
    "print \"image features found at indeces (\" + str(int(conf_idx[0])) + \",\" + str(int(conf_idx[1])) + \",\" + str(int(conf_idx[2])) + \",\" + str(int(conf_idx[3])) + \")\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

## read_SSD_lmdb.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              read_SSD_lmdb.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## SSD_layer_statistics.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              SSD_layer_statistics.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Perform detective work to find layer and features for a specific image and label"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"It may be useful to have the network diagram available as you trace backwards through the network"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Set up"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Set caffe root, label map, model definition, and model weights\n",
	"#caffe_root = '../' # this file is expected to be in {caffe_root}/examples\n",
	"caffe_root = '/work/caffe'\n",
	"voc_labelmap_file = 'data/VOC0712/labelmap_voc.prototxt'\n",
	"model_def = 'models/VGGNet/VOC0712/SSD_300x300/test.prototxt'\n",
	"model_weights = 'models/VGGNet/VOC0712/SSD_300x300/VGG_VOC0712_SSD_300x300_iter_60000.caffemodel'\n",
	"\n",
	"# Set confidence threshold (0-1) for object detection\n",
	"conf_thresh = 0.6\n",
	"\n",
	"# Set index of image of interest\n",
	"image_index = 0\n",
	"\n",
	"import numpy as np\n",
	"import matplotlib.pyplot as plt\n",
	"%matplotlib inline\n",
	"\n",
	"plt.rcParams['figure.figsize'] = (10, 10)\n",
	"plt.rcParams['image.interpolation'] = 'nearest'\n",
	"plt.rcParams['image.cmap'] = 'gray'\n",
	"\n",
	"# Make sure that caffe is on the python path:\n",
	"import os\n",
	"os.chdir(caffe_root)\n",
	"import sys\n",
	"sys.path.insert(0, 'python')\n",
	"\n",
	"import caffe\n",
	"caffe.set_device(0)\n",
	"caffe.set_mode_gpu()\n",
	"from google.protobuf import text_format\n",
	"from caffe.proto import caffe_pb2\n",
	"\n",
	"# load PASCAL VOC labels\n",
	"file = open(voc_labelmap_file, 'r')\n",
	"voc_labelmap = caffe_pb2.LabelMap()\n",
	"text_format.Merge(str(file.read()), voc_labelmap)\n",
	"\n",
	"def get_labelname(labelmap, labels):\n",
	" num_labels = len(labelmap.item)\n",
	" labelnames = []\n",
	" if type(labels) is not list:\n",
	" labels = [labels]\n",
	" for label in labels:\n",
	" found = False\n",
	" for i in xrange(0, num_labels):\n",
	" if label == labelmap.item[i].label:\n",
	" found = True\n",
	" labelnames.append(labelmap.item[i].display_name)\n",
	" break\n",
	" assert found == True\n",
	" return labelnames\n",
	"\n",
	"net = caffe.Net(model_def, # defines the structure of the model\n",
	" model_weights, # contains the trained weights\n",
	" caffe.TEST) # use test mode (e.g., don't perform dropout)\n",
	"\n",
	"# input preprocessing: 'data' is the name of the input blob == net.inputs[0]\n",
	"transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\n",
	"transformer.set_transpose('data', (2, 0, 1))\n",
	"transformer.set_mean('data', np.array([104,117,123])) # mean pixel\n",
	"transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]\n",
	"transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Iterate to image of interest"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# iterate to image of interest\n",
	"for i in range(0, image_index + 1):\n",
	" net.forward()\n",
	"\n",
	"detections = net.blobs['detection_out'].data\n",
	"\n",
	"# Parse the outputs.\n",
	"det_label = detections[0,0,:,1]\n",
	"det_conf = detections[0,0,:,2]\n",
	"det_xmin = detections[0,0,:,3]\n",
	"det_ymin = detections[0,0,:,4]\n",
	"det_xmax = detections[0,0,:,5]\n",
	"det_ymax = detections[0,0,:,6]\n",
	"\n",
	"# Get detections with confidence higher than 0.6.\n",
	"top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]\n",
	"\n",
	"top_conf = det_conf[top_indices]\n",
	"top_label_indices = det_label[top_indices].tolist()\n",
	"top_labels = get_labelname(voc_labelmap, top_label_indices)\n",
	"top_xmin = det_xmin[top_indices]\n",
	"top_ymin = det_ymin[top_indices]\n",
	"top_xmax = det_xmax[top_indices]\n",
	"top_ymax = det_ymax[top_indices]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Print number of high confidence objects in image"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"num el: 1\n"
	]
	}
	],
	"source": [
	"numel = top_conf.size;\n",
	"print \"num el:\", str(numel)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Find layer responsible for object"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"object 0 of 0\n",
	"object confidence: 0.999378\n",
	"predicted label: person\n",
	"\n",
	"softmax layer shape: (1, 7308, 21)\n",
	"index in softmax layer is (array([0]), array([7273]), array([15]))\n",
	"\n",
	"output confidence came from layer conv8_2_mbox_conf_perm\n",
	"\n",
	"permuted later shape: (1, 3, 3, 126)\n",
	"index in permuted is (array([0]), array([1]), array([1]), array([36]))\n",
	"index before premuted is [array([0]), array([36]), array([1]), array([1])]\n",
	"\n",
	"--Layer With Image Features--\n",
	"name of conv/pool/fc conf layer is conv8_2_mbox_conf\n",
	"size of conf layer is (1, 126, 3, 3)\n",
	"image features found at indeces (0,36,1,1)\n"
	]
	}
	],
	"source": [
	"# Set k to the object of interest\n",
	"k = numel - 1 # numel - 1 = most confident\n",
	"\n",
	"top_conf_k = top_conf[k]\n",
	"print \"object\", str(k), \"of\", str(numel-1)\n",
	"print \"object confidence:\", str(top_conf_k)\n",
	"print \"predicted label:\", top_labels[k]\n",
	"print\n",
	"\n",
	"# find index of this confidence in output of mbox_conf_softmax\n",
	"conf_softmax = net.blobs['mbox_conf_softmax'].data\n",
	"tconf_softmax_idx = np.where(conf_softmax == top_conf_k)\n",
	"print \"softmax layer shape:\", conf_softmax.shape\n",
	"print \"index in softmax layer is \" + str(tconf_softmax_idx)\n",
	"print\n",
	"\n",
	"# use index from mbox_conf_softmax to find value in mbox_conf_reshape\n",
	"conf_reshape = net.blobs['mbox_conf_reshape'].data\n",
	"tconf_reshape = conf_reshape[tconf_softmax_idx[0], tconf_softmax_idx[1], tconf_softmax_idx[2]]\n",
	"\n",
	"# use value from mbox_conf_reshape to find input layer (after perm) and index of layer\n",
	"layers = [\"conv4_3_norm_mbox_conf_perm\", \"fc7_mbox_conf_perm\", \"conv6_2_mbox_conf_perm\", \"conv7_2_mbox_conf_perm\", \"conv8_2_mbox_conf_perm\", \"pool6_mbox_conf_perm\"]\n",
	"\n",
	"for layer in layers:\n",
	" layer_data = net.blobs[layer].data\n",
	" if tconf_reshape in layer_data:\n",
	" conf_perm = layer_data\n",
	" conf_perm_name = layer\n",
	"\n",
	"conf_perm_idx = np.where(conf_perm == tconf_reshape)\n",
	"\n",
	"print \"output confidence came from layer \" + conf_perm_name\n",
	"print\n",
	"print \"permuted later shape:\", conf_perm.shape\n",
	"print \"index in permuted is \" + str(conf_perm_idx)\n",
	"\n",
	"# get index of data before permutation using prototxt info (0,1,2,3 -> 0,2,3,1)\n",
	"# this is the index after convolving/pooling/fully connected layer\n",
	"conf_idx = [conf_perm_idx[0], conf_perm_idx[3], conf_perm_idx[1], conf_perm_idx[2]]\n",
	"print \"index before premuted is \" + str(conf_idx)\n",
	"print\n",
	"# get data from layer after conv/pool/fc\n",
	"conf_post_name = conf_perm_name[:-5]\n",
	"conf_post = net.blobs[conf_post_name].data\n",
	"\n",
	"# get bin high conf is found in\n",
	"bin = conf_idx[1] / 21\n",
	"\n",
	"# IMGAGE FEATURE LAYER AND INDECES\n",
	"print \"--Layer With Image Features--\"\n",
	"print \"name of conv/pool/fc conf layer is \" + conf_post_name\n",
	"tconf_post = conf_post[conf_idx[0], conf_idx[1], conf_idx[2], conf_idx[3]]\n",
	"print \"size of conf layer is \" + str(net.blobs[conf_post_name].data.shape)\n",
	"print \"image features found at indeces (\" + str(int(conf_idx[0])) + \",\" + str(int(conf_idx[1])) + \",\" + str(int(conf_idx[2])) + \",\" + str(int(conf_idx[3])) + \")\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.12"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}