Skip to content

Instantly share code, notes, and snippets.

@atelierhide
Last active August 29, 2015 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atelierhide/0ef9f7fdd24ba8424ecc to your computer and use it in GitHub Desktop.
Save atelierhide/0ef9f7fdd24ba8424ecc to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PyData.Tokyo Meetup #4 - Performance Tips"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hideki Tanaka (@atelierhide) 19/04/2015 \n",
"\n",
"CPython 2.7.9\n",
"IPython 3.1.0\n",
"\n",
"numpy 1.9.2\n",
"numba 0.18.2\n",
"cython 0.22\n"
]
}
],
"source": [
"%load_ext watermark\n",
"%watermark -a 'Hideki Tanaka (@atelierhide)' -v -p numpy,numba,cython -d"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"### Multiprocessing"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 loops, best of 3: 2.83 ms per loop\n"
]
}
],
"source": [
"from multiprocessing import Pool\n",
"import math\n",
"\n",
"pool = Pool(processes=4)\n",
"%timeit pool.map(math.sqrt, xrange(10000))\n",
"\n",
"pool.close()\n",
"pool.join()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"### Numpy, Numba, Cython"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"X = np.random.random((1000, 3))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def pairwise_python(X):\n",
" M = X.shape[0]\n",
" N = X.shape[1]\n",
" D = np.empty((M, M), dtype=np.float)\n",
" for i in range(M):\n",
" for j in range(M):\n",
" d = 0.0\n",
" for k in range(N):\n",
" tmp = X[i, k] - X[j, k]\n",
" d += tmp * tmp\n",
" D[i, j] = np.sqrt(d)\n",
" return D"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Numpy\n",
"def pairwise_numpy(X):\n",
" return np.sqrt(((X[:, None, :] - X) ** 2).sum(-1))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Numba\n",
"from numba.decorators import autojit\n",
"pairwise_numba = autojit(pairwise_python)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Cython\n",
"%load_ext Cython"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%cython\n",
"\n",
"import numpy as np\n",
"cimport cython\n",
"from libc.math cimport sqrt\n",
"\n",
"@cython.boundscheck(False)\n",
"@cython.wraparound(False)\n",
"def pairwise_cython(double[:, ::1] X):\n",
" cdef int M = X.shape[0]\n",
" cdef int N = X.shape[1]\n",
" cdef double tmp, d\n",
" cdef double[:, ::1] D = np.empty((M, M), dtype=np.float64)\n",
" for i in range(M):\n",
" for j in range(M):\n",
" d = 0.0\n",
" for k in range(N):\n",
" tmp = X[i, k] - X[j, k]\n",
" d += tmp * tmp\n",
" D[i, j] = sqrt(d)\n",
" return np.asarray(D)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loops, best of 3: 5.78 s per loop\n",
"10 loops, best of 3: 65 ms per loop\n",
"100 loops, best of 3: 10.6 ms per loop\n",
"100 loops, best of 3: 9.75 ms per loop\n"
]
}
],
"source": [
"%timeit pairwise_python(X)\n",
"%timeit pairwise_numpy(X)\n",
"%timeit pairwise_numba(X)\n",
"%timeit pairwise_cython(X)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"### List Comprehension, Map, Numpy"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import math\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def list_append(x):\n",
" results = []\n",
" for i in xrange(x):\n",
" results.append(math.sqrt(i))\n",
" return results\n",
"\n",
"def list_append2(x):\n",
" results = []\n",
" for i in xrange(x):\n",
" results.append(math.sqrt(i))\n",
" return results\n",
"\n",
"def list_comp(x):\n",
" results = [math.sqrt(i) for i in xrange(x)]\n",
" return results\n",
"\n",
"def list_map(x):\n",
" results = map(math.sqrt, xrange(x))\n",
" return results\n",
"\n",
"def list_numpy(x):\n",
" results = list(np.sqrt(np.arange(x)))\n",
" return results"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 loops, best of 3: 3.09 ms per loop\n",
"100 loops, best of 3: 3.09 ms per loop\n",
"1000 loops, best of 3: 1.86 ms per loop\n",
"1000 loops, best of 3: 1.17 ms per loop\n",
"1000 loops, best of 3: 736 µs per loop\n"
]
}
],
"source": [
"x = 10000\n",
"%timeit list_append(x)\n",
"%timeit list_append2(x)\n",
"%timeit list_comp(x)\n",
"%timeit list_map(x)\n",
"%timeit list_numpy(x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### References\n",
"\n",
"- [High Performance Python Computing for Data Science](http://www.slideshare.net/tkm2261/high-performance-python-computing-for-data-science)\n",
"- [Numba vs. Cython](https://jakevdp.github.io/blog/2013/06/15/numba-vs-cython-take-2/)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment