abidrahmank/dilatedConvolution.ipynb

## dilatedConvolution.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import sys\n",
    "sys.path.insert(0, '/home/auviz23/Repos/caffe/python')\n",
    "import caffe\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Caffe Methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "net = caffe.Net('dilatedConv_deploy.prototxt', caffe.TEST)\n",
    "src = net.blobs['data']\n",
    "dst = net.blobs['conv1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x = np.arange(9*9).reshape(1, 1, 9, 9)\n",
    "#y = np.ones((1, 1, 3, 3))\n",
    "y = np.arange(3*3).reshape(1, 1, 3, 3)\n",
    "\n",
    "src.data[...] = x\n",
    "net.params['conv1'][0].data[...] = y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Blob:  data (1, 9, 9)\n",
      "Blob:  conv1 (1, 3, 3)\n",
      "Filter:  conv1 (1, 3, 3)\n"
     ]
    }
   ],
   "source": [
    "for key in net.blobs.keys():\n",
    "    print \"Blob: \", key, net.blobs[key].data[0].shape\n",
    "    \n",
    "for key in net.params.keys():\n",
    "    print \"Filter: \", key, net.params[key][0].data[0].shape\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Input: \n",
      "[[  0.   1.   2.   3.   4.   5.   6.   7.   8.]\n",
      " [  9.  10.  11.  12.  13.  14.  15.  16.  17.]\n",
      " [ 18.  19.  20.  21.  22.  23.  24.  25.  26.]\n",
      " [ 27.  28.  29.  30.  31.  32.  33.  34.  35.]\n",
      " [ 36.  37.  38.  39.  40.  41.  42.  43.  44.]\n",
      " [ 45.  46.  47.  48.  49.  50.  51.  52.  53.]\n",
      " [ 54.  55.  56.  57.  58.  59.  60.  61.  62.]\n",
      " [ 63.  64.  65.  66.  67.  68.  69.  70.  71.]\n",
      " [ 72.  73.  74.  75.  76.  77.  78.  79.  80.]] \n",
      "\n",
      "Filter: \n",
      "[[0 1 2]\n",
      " [3 4 5]\n",
      " [6 7 8]] \n",
      "\n",
      "Output: \n",
      "[[ 1584.  1620.  1656.]\n",
      " [ 1908.  1944.  1980.]\n",
      " [ 2232.  2268.  2304.]] \n",
      "\n"
     ]
    }
   ],
   "source": [
    "out = net.forward()\n",
    "print \"Input: \\n\", src.data[0, 0], \"\\n\"\n",
    "print \"Filter: \\n\", y[0, 0], \"\\n\"\n",
    "print \"Output: \\n\", dst.data[0, 0], \"\\n\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Summary : Idea is very simple. There is a new **dilation** parameter in the convolution kernel in caffe. By default, **dilation = 1**. It denotes our normal convolution.\n",
    "\n",
    "Now if **dilation = 2**, it **fills one zero** in between filter elements. For example, 3x3 kernel becomes 5x5 (because one zero between all the filter elements). Then do the convolution with this new 5x5 kernel.\n",
    "\n",
    "If **dilation = 3**, it **fills two zeros** in between filter elements. So 3x3 kernel becomes 7x7 kernel."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Vanilla Python Implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Weights :\n",
      "[[ 0.  0.  0.  1.  0.  0.  2.]\n",
      " [ 0.  0.  0.  0.  0.  0.  0.]\n",
      " [ 0.  0.  0.  0.  0.  0.  0.]\n",
      " [ 3.  0.  0.  4.  0.  0.  5.]\n",
      " [ 0.  0.  0.  0.  0.  0.  0.]\n",
      " [ 0.  0.  0.  0.  0.  0.  0.]\n",
      " [ 6.  0.  0.  7.  0.  0.  8.]]\n",
      "Output : \n",
      "[[ 1584.  1620.  1656.]\n",
      " [ 1908.  1944.  1980.]\n",
      " [ 2232.  2268.  2304.]]\n"
     ]
    }
   ],
   "source": [
    "from scipy.signal import correlate2d  # Use correlation for straight convolution\n",
    "\n",
    "# A function to do 2D convolution without stride\n",
    "def convolution(img, wts, pad = 0, dilation = 1):\n",
    "    wr, wc = wts.shape\n",
    "    newX = wr+(wr-1)*(dilation-1)\n",
    "    newWts = np.zeros((newX, newX))\n",
    "    newWts[::dilation, ::dilation] = wts\n",
    "    print \"Weights :\\n\", newWts\n",
    "    return correlate2d(img, newWts, mode='valid')\n",
    "\n",
    "result = convolution (x[0, 0], y[0, 0], dilation = 3)\n",
    "print  \"Output : \\n\", result"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"import sys\n",
	"sys.path.insert(0, '/home/auviz23/Repos/caffe/python')\n",
	"import caffe\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Caffe Methods"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"net = caffe.Net('dilatedConv_deploy.prototxt', caffe.TEST)\n",
	"src = net.blobs['data']\n",
	"dst = net.blobs['conv1']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"x = np.arange(9*9).reshape(1, 1, 9, 9)\n",
	"#y = np.ones((1, 1, 3, 3))\n",
	"y = np.arange(3*3).reshape(1, 1, 3, 3)\n",
	"\n",
	"src.data[...] = x\n",
	"net.params['conv1'][0].data[...] = y"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Blob: data (1, 9, 9)\n",
	"Blob: conv1 (1, 3, 3)\n",
	"Filter: conv1 (1, 3, 3)\n"
	]
	}
	],
	"source": [
	"for key in net.blobs.keys():\n",
	" print \"Blob: \", key, net.blobs[key].data[0].shape\n",
	" \n",
	"for key in net.params.keys():\n",
	" print \"Filter: \", key, net.params[key][0].data[0].shape\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Input: \n",
	"[[ 0. 1. 2. 3. 4. 5. 6. 7. 8.]\n",
	" [ 9. 10. 11. 12. 13. 14. 15. 16. 17.]\n",
	" [ 18. 19. 20. 21. 22. 23. 24. 25. 26.]\n",
	" [ 27. 28. 29. 30. 31. 32. 33. 34. 35.]\n",
	" [ 36. 37. 38. 39. 40. 41. 42. 43. 44.]\n",
	" [ 45. 46. 47. 48. 49. 50. 51. 52. 53.]\n",
	" [ 54. 55. 56. 57. 58. 59. 60. 61. 62.]\n",
	" [ 63. 64. 65. 66. 67. 68. 69. 70. 71.]\n",
	" [ 72. 73. 74. 75. 76. 77. 78. 79. 80.]] \n",
	"\n",
	"Filter: \n",
	"[[0 1 2]\n",
	" [3 4 5]\n",
	" [6 7 8]] \n",
	"\n",
	"Output: \n",
	"[[ 1584. 1620. 1656.]\n",
	" [ 1908. 1944. 1980.]\n",
	" [ 2232. 2268. 2304.]] \n",
	"\n"
	]
	}
	],
	"source": [
	"out = net.forward()\n",
	"print \"Input: \\n\", src.data[0, 0], \"\\n\"\n",
	"print \"Filter: \\n\", y[0, 0], \"\\n\"\n",
	"print \"Output: \\n\", dst.data[0, 0], \"\\n\""
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Summary : Idea is very simple. There is a new dilation parameter in the convolution kernel in caffe. By default, dilation = 1. It denotes our normal convolution.\n",
	"\n",
	"Now if dilation = 2, it fills one zero in between filter elements. For example, 3x3 kernel becomes 5x5 (because one zero between all the filter elements). Then do the convolution with this new 5x5 kernel.\n",
	"\n",
	"If dilation = 3, it fills two zeros in between filter elements. So 3x3 kernel becomes 7x7 kernel."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Vanilla Python Implementation"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 39,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Weights :\n",
	"[[ 0. 0. 0. 1. 0. 0. 2.]\n",
	" [ 0. 0. 0. 0. 0. 0. 0.]\n",
	" [ 0. 0. 0. 0. 0. 0. 0.]\n",
	" [ 3. 0. 0. 4. 0. 0. 5.]\n",
	" [ 0. 0. 0. 0. 0. 0. 0.]\n",
	" [ 0. 0. 0. 0. 0. 0. 0.]\n",
	" [ 6. 0. 0. 7. 0. 0. 8.]]\n",
	"Output : \n",
	"[[ 1584. 1620. 1656.]\n",
	" [ 1908. 1944. 1980.]\n",
	" [ 2232. 2268. 2304.]]\n"
	]
	}
	],
	"source": [
	"from scipy.signal import correlate2d # Use correlation for straight convolution\n",
	"\n",
	"# A function to do 2D convolution without stride\n",
	"def convolution(img, wts, pad = 0, dilation = 1):\n",
	" wr, wc = wts.shape\n",
	" newX = wr+(wr-1)*(dilation-1)\n",
	" newWts = np.zeros((newX, newX))\n",
	" newWts[::dilation, ::dilation] = wts\n",
	" print \"Weights :\\n\", newWts\n",
	" return correlate2d(img, newWts, mode='valid')\n",
	"\n",
	"result = convolution (x[0, 0], y[0, 0], dilation = 3)\n",
	"print \"Output : \\n\", result"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.11"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}