khanrc/NP-iteration.ipynb

## NP-iteration.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Numpy Iteration test\n",
    "\n",
    "Performance comparison of numpy iteration methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.7.6 (default, Oct 26 2016, 20:30:19) \n",
      "[GCC 4.8.4]\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "print sys.version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import random\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "N = 100000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np_db = np.random.randint(1, 100+1, [N, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100000, 2)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np_db.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "list_db = np_db.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for i in range(N):\n",
    "    if np_db[i, 0] != list_db[i][0] or np_db[i, 1] != list_db[i][1]:\n",
    "        print \"Error\"\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 loop, best of 3: 319 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "c = 0\n",
    "for n, m in np_db:\n",
    "    if n < m*2.5:\n",
    "        c += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 loop, best of 3: 269 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "c = 0\n",
    "for i in range(np_db.shape[0]):\n",
    "    if np_db[i, 0] < np_db[i, 1]*2.5:\n",
    "        c += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 loops, best of 3: 17.1 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "c = 0\n",
    "for n, m in np_db.tolist():\n",
    "    if n < m*2.5:\n",
    "        c += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 loops, best of 3: 8.07 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "c = 0\n",
    "for n, m in list_db:\n",
    "    if n < m*2.5:\n",
    "        c += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000 loops, best of 3: 309 µs per loop\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "np.sum(np_db[:, 0] < np_db[:, 1]*2.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 loop, best of 3: 270 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "len(filter(lambda tu: tu[0] < tu[1]*2.5, np_db))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 loops, best of 3: 11.2 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "len(filter(lambda tu: tu[0] < tu[1]*2.5, list_db))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Conclusion\n",
    "\n",
    "* Use numpy-tic methods\n",
    "* When it's not possible, use list iteration instead of numpy iteration through `tolist()`\n",
    "* Performance of filter is similar to for iteration"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Numpy Iteration test\n",
	"\n",
	"Performance comparison of numpy iteration methods"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2.7.6 (default, Oct 26 2016, 20:30:19) \n",
	"[GCC 4.8.4]\n"
	]
	}
	],
	"source": [
	"import sys\n",
	"print sys.version"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import random\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"N = 100000"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"np_db = np.random.randint(1, 100+1, [N, 2])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(100000, 2)"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np_db.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"list_db = np_db.tolist()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"for i in range(N):\n",
	" if np_db[i, 0] != list_db[i][0] or np_db[i, 1] != list_db[i][1]:\n",
	" print \"Error\"\n",
	" break"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1 loop, best of 3: 319 ms per loop\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"c = 0\n",
	"for n, m in np_db:\n",
	" if n < m*2.5:\n",
	" c += 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1 loop, best of 3: 269 ms per loop\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"c = 0\n",
	"for i in range(np_db.shape[0]):\n",
	" if np_db[i, 0] < np_db[i, 1]*2.5:\n",
	" c += 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"100 loops, best of 3: 17.1 ms per loop\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"c = 0\n",
	"for n, m in np_db.tolist():\n",
	" if n < m*2.5:\n",
	" c += 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"100 loops, best of 3: 8.07 ms per loop\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"c = 0\n",
	"for n, m in list_db:\n",
	" if n < m*2.5:\n",
	" c += 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1000 loops, best of 3: 309 µs per loop\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"np.sum(np_db[:, 0] < np_db[:, 1]*2.5)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1 loop, best of 3: 270 ms per loop\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"len(filter(lambda tu: tu[0] < tu[1]*2.5, np_db))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"100 loops, best of 3: 11.2 ms per loop\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"len(filter(lambda tu: tu[0] < tu[1]*2.5, list_db))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Conclusion\n",
	"\n",
	"* Use numpy-tic methods\n",
	"* When it's not possible, use list iteration instead of numpy iteration through `tolist()`\n",
	"* Performance of filter is similar to for iteration"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}