Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A Jupyter notebook to get the receptive field and effective stride of layers in a CNN. Supports dilated convolutions as well.
{
"cells": [
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import math\n",
"import copy\n",
"from __future__ import division\n",
"\n",
"def layer_params(layer):\n",
" s = layer['s'] if 's' in layer else 1\n",
" d = layer['d'] if 'd' in layer else 1\n",
" k = layer['k']\n",
" return k, s, d\n",
"\n",
"def output_size(i, k, s=1, p=0, d=1):\n",
" o = math.floor((i + 2 * p - d * (k - 1) - 1) / s) + 1\n",
" return int(o)\n",
" \n",
"def find_output_size(network, i=224):\n",
" net = copy.deepcopy(network)\n",
" for layer in net:\n",
" layer['i'] = i\n",
" o = output_size(**layer)\n",
" i = o\n",
" return o\n",
"\n",
"def find_receptive_field(net):\n",
" output = [] # (rf, effective stride)\n",
" for i in range(len(net)):\n",
" k, s, d = layer_params(net[i])\n",
" if i == 0:\n",
" rf_p = 1\n",
" s_p = 1\n",
" else:\n",
" rf_p, s_p = output[i-1]\n",
" es = s * s_p\n",
" rf = rf_p + d * s_p * (k-1)\n",
" output.append((rf, es))\n",
" return output\n",
"\n",
"def find_full_info(network_with_names, input_size=224):\n",
" structure = network_with_names[0::2]\n",
" layer_names = network_with_names[1::2]\n",
" print '%-10s| %-16s| %-16s| %-16s' % ('Layer Name', 'Receptive Field', 'Effective Stride', 'Output Size')\n",
" print '-' * 59\n",
" print '%-10s| %-16s| %-16s| %-16s' % ('Input', '--', '--', str(input_size))\n",
" rf_s = find_receptive_field(structure)\n",
" for i in range(0, len(structure)):\n",
" print '%-10s| %-16d| %-16d| %-16d' % (\n",
" layer_names[i], rf_s[i][0], rf_s[i][1], find_output_size(structure[:i+1], input_size))\n",
" i = len(structure) - 1\n",
" return rf_s[i][0], rf_s[i][1], find_output_size(structure[:i+1], input_size)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 Conv\n",
"Layer Name| Receptive Field | Effective Stride| Output Size \n",
"-----------------------------------------------------------\n",
"Input | -- | -- | 224 \n",
"conv1 | 3 | 1 | 222 \n"
]
}
],
"source": [
"# Format: {\n",
"# 'k': kernel_size, \n",
"# 's': stride (default 1), \n",
"# 'p': padding (default 0), \n",
"# 'd': dilation (default 1)\n",
"# }\n",
"\n",
"print '1 Conv'\n",
"network = [\n",
" {'k': 3}, 'conv1',\n",
"]\n",
"find_full_info(network);"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 Conv + 1 Pool\n",
"Layer Name| Receptive Field | Effective Stride| Output Size \n",
"-----------------------------------------------------------\n",
"Input | -- | -- | 224 \n",
"conv1 | 3 | 1 | 222 \n",
"conv2 | 5 | 1 | 220 \n",
"pool1 | 6 | 2 | 110 \n"
]
}
],
"source": [
"print '2 Conv + 1 Pool'\n",
"network = [\n",
" {'k': 3}, 'conv1',\n",
" {'k': 3}, 'conv2',\n",
" {'k': 2, 's': 2}, 'pool1',\n",
"]\n",
"find_full_info(network);"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AlexNet\n",
"Layer Name| Receptive Field | Effective Stride| Output Size \n",
"-----------------------------------------------------------\n",
"Input | -- | -- | 227 \n",
"conv1 | 11 | 4 | 55 \n",
"pool1 | 19 | 8 | 27 \n",
"conv2 | 51 | 8 | 27 \n",
"pool2 | 67 | 16 | 13 \n",
"conv3 | 99 | 16 | 13 \n",
"conv4 | 131 | 16 | 13 \n",
"conv5 | 163 | 16 | 13 \n",
"pool5 | 195 | 32 | 6 \n",
"fc6 | 355 | 32 | 1 \n",
"fc7 | 355 | 32 | 1 \n"
]
}
],
"source": [
"print 'AlexNet'\n",
"network = [\n",
" {'k': 11, 's': 4, 'p': 0}, 'conv1',\n",
" {'k': 3, 's': 2, 'p': 0}, 'pool1',\n",
" {'k': 5, 's': 1, 'p': 2}, 'conv2',\n",
" {'k': 3, 's': 2, 'p': 0}, 'pool2',\n",
" {'k': 3, 's': 1, 'p': 1}, 'conv3',\n",
" {'k': 3, 's': 1, 'p': 1}, 'conv4',\n",
" {'k': 3, 's': 1, 'p': 1}, 'conv5',\n",
" {'k': 3, 's': 2, 'p': 0}, 'pool5',\n",
" {'k': 6}, 'fc6',\n",
" {'k': 1}, 'fc7',\n",
"]\n",
"find_full_info(network, input_size=227);"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VGG-16\n",
"Layer Name| Receptive Field | Effective Stride| Output Size \n",
"-----------------------------------------------------------\n",
"Input | -- | -- | 224 \n",
"conv1_1 | 3 | 1 | 224 \n",
"conv1_2 | 5 | 1 | 224 \n",
"pool1 | 6 | 2 | 112 \n",
"conv2_1 | 10 | 2 | 112 \n",
"conv2_2 | 14 | 2 | 112 \n",
"pool2 | 16 | 4 | 56 \n",
"conv3_1 | 24 | 4 | 56 \n",
"conv3_2 | 32 | 4 | 56 \n",
"conv3_3 | 40 | 4 | 56 \n",
"pool3 | 44 | 8 | 28 \n",
"conv4_1 | 60 | 8 | 28 \n",
"conv4_2 | 76 | 8 | 28 \n",
"conv4_3 | 92 | 8 | 28 \n",
"pool4 | 100 | 16 | 14 \n",
"conv5_1 | 132 | 16 | 14 \n",
"conv5_2 | 164 | 16 | 14 \n",
"conv5_3 | 196 | 16 | 14 \n",
"pool5 | 212 | 32 | 7 \n",
"fc6 | 404 | 32 | 1 \n",
"fc7 | 404 | 32 | 1 \n",
"fc8 | 404 | 32 | 1 \n"
]
}
],
"source": [
"print 'VGG-16'\n",
"network = [\n",
" {'k': 3, 'p': 1}, 'conv1_1',\n",
" {'k': 3, 'p': 1}, 'conv1_2',\n",
" {'k': 2, 's': 2}, 'pool1',\n",
" {'k': 3, 'p': 1}, 'conv2_1',\n",
" {'k': 3, 'p': 1}, 'conv2_2',\n",
" {'k': 2, 's': 2}, 'pool2',\n",
" {'k': 3, 'p': 1}, 'conv3_1',\n",
" {'k': 3, 'p': 1}, 'conv3_2',\n",
" {'k': 3, 'p': 1}, 'conv3_3',\n",
" {'k': 2, 's': 2}, 'pool3',\n",
" {'k': 3, 'p': 1}, 'conv4_1',\n",
" {'k': 3, 'p': 1}, 'conv4_2',\n",
" {'k': 3, 'p': 1}, 'conv4_3',\n",
" {'k': 2, 's': 2}, 'pool4',\n",
" {'k': 3, 'p': 1}, 'conv5_1',\n",
" {'k': 3, 'p': 1}, 'conv5_2',\n",
" {'k': 3, 'p': 1}, 'conv5_3',\n",
" {'k': 2, 's': 2}, 'pool5',\n",
" {'k': 7}, 'fc6',\n",
" {'k': 1}, 'fc7',\n",
" {'k': 1}, 'fc8',\n",
"]\n",
"find_full_info(network);"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VGG-16 with Dilated Convs for Dense Prediction\n",
"\n",
"Pascal VOC front end\n",
"Layer Name| Receptive Field | Effective Stride| Output Size \n",
"-----------------------------------------------------------\n",
"Input | -- | -- | 900 \n",
"conv1_1 | 3 | 1 | 898 \n",
"conv1_2 | 5 | 1 | 896 \n",
"pool1 | 6 | 2 | 448 \n",
"conv2_1 | 10 | 2 | 446 \n",
"conv2_2 | 14 | 2 | 444 \n",
"pool2 | 16 | 4 | 222 \n",
"conv3_1 | 24 | 4 | 220 \n",
"conv3_2 | 32 | 4 | 218 \n",
"conv3_3 | 40 | 4 | 216 \n",
"pool3 | 44 | 8 | 108 \n",
"conv4_1 | 60 | 8 | 106 \n",
"conv4_2 | 76 | 8 | 104 \n",
"conv4_3 | 92 | 8 | 102 \n",
"conv5_1 | 124 | 8 | 98 \n",
"conv5_2 | 156 | 8 | 94 \n",
"conv5_3 | 188 | 8 | 90 \n",
"fc6 | 380 | 8 | 66 \n",
"fc7 | 380 | 8 | 66 \n",
"fc-final | 380 | 8 | 66 \n",
"\n",
"Context Aggregation Module\n",
"Layer Name| Receptive Field | Effective Stride| Output Size \n",
"-----------------------------------------------------------\n",
"Input | -- | -- | 66 \n",
"ct_conv1_1| 3 | 1 | 130 \n",
"ct_conv1_2| 5 | 1 | 128 \n",
"ct_conv2_1| 9 | 1 | 124 \n",
"ct_conv3_1| 17 | 1 | 116 \n",
"ct_conv4_1| 33 | 1 | 100 \n",
"ct_conv5_1| 65 | 1 | 68 \n",
"ct_fc1 | 67 | 1 | 66 \n",
"ct_final | 67 | 1 | 66 \n"
]
}
],
"source": [
"# https://arxiv.org/pdf/1511.07122v3.pdf\n",
"# https://github.com/fyu/dilation/blob/master/models/dilation8_pascal_voc_deploy.prototxt\n",
"# No intermediate padding.\n",
"print 'VGG-16 with Dilated Convs for Dense Prediction\\n'\n",
"print 'Pascal VOC front end'\n",
"network = [\n",
" {'k': 3}, 'conv1_1',\n",
" {'k': 3}, 'conv1_2',\n",
" {'k': 2, 's': 2}, 'pool1',\n",
" {'k': 3}, 'conv2_1',\n",
" {'k': 3}, 'conv2_2',\n",
" {'k': 2, 's': 2}, 'pool2',\n",
" {'k': 3}, 'conv3_1',\n",
" {'k': 3}, 'conv3_2',\n",
" {'k': 3}, 'conv3_3',\n",
" {'k': 2, 's': 2}, 'pool3',\n",
" {'k': 3}, 'conv4_1',\n",
" {'k': 3}, 'conv4_2',\n",
" {'k': 3}, 'conv4_3',\n",
" {'k': 3, 'd': 2}, 'conv5_1',\n",
" {'k': 3, 'd': 2}, 'conv5_2',\n",
" {'k': 3, 'd': 2}, 'conv5_3',\n",
" {'k': 7, 'd': 4}, 'fc6',\n",
" {'k': 1}, 'fc7',\n",
" {'k': 1}, 'fc-final',\n",
"]\n",
"find_full_info(network, input_size=900);\n",
"print ''\n",
"\n",
"print 'Context Aggregation Module'\n",
"network = [\n",
" {'k': 3, 'p': 33}, 'ct_conv1_1',\n",
" {'k': 3}, 'ct_conv1_2',\n",
" {'k': 3, 'd': 2}, 'ct_conv2_1',\n",
" {'k': 3, 'd': 4}, 'ct_conv3_1',\n",
" {'k': 3, 'd': 8}, 'ct_conv4_1',\n",
" {'k': 3, 'd': 16}, 'ct_conv5_1',\n",
" {'k': 3}, 'ct_fc1',\n",
" {'k': 1}, 'ct_final',\n",
"]\n",
"find_full_info(network, input_size=66);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment