Skip to content

Instantly share code, notes, and snippets.

@shoyer
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shoyer/458910551327d3c88f9c to your computer and use it in GitHub Desktop.
Save shoyer/458910551327d3c88f9c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:c151ec31c3cb26aaf81320c5cd2c7d3e1668901a7d40ecc3e1497d98327fab2c"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from numba import vectorize, guvectorize, jit\n",
"import numba\n",
"import numpy as np\n",
"import bottleneck"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 179
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"nansum"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"@jit\n",
"def nansum(x):\n",
" result = 0.0\n",
" for xi in x.flat:\n",
" if not np.isnan(xi):\n",
" result += xi\n",
" return result\n",
"\n",
"\n",
"@vectorize(['float32(float32, float32)',\n",
" 'float64(float64, float64)'])\n",
"def nanadd(a, b):\n",
" anull = np.isnan(a)\n",
" bnull = np.isnan(b)\n",
" if anull and bnull:\n",
" return 0.0\n",
" elif anull:\n",
" return b\n",
" elif bnull:\n",
" return a\n",
" else:\n",
" return a + b"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 326
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x = np.random.RandomState(0).randn(1000000.0).reshape(1000, 1000)\n",
"x[x < -1] = np.nan"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 327
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit bottleneck.nansum(x)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 2.19 ms per loop\n"
]
}
],
"prompt_number": 328
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit nansum(x)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 2.64 ms per loop\n"
]
}
],
"prompt_number": 188
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit nanadd.reduce(x)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 4.74 ms per loop\n"
]
}
],
"prompt_number": 157
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit np.nansum(x)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 5.42 ms per loop\n"
]
}
],
"prompt_number": 158
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"reducing nansum"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"@guvectorize(['void(float64[:,:], float64[:])'],\n",
" '(n, m)->(n)')\n",
"def nansum_last_axis(x, result):\n",
" for i in range(x.shape[0]):\n",
" result[i] = 0.0\n",
" for j in range(x.shape[1]):\n",
" xij = x[i, j]\n",
" if not np.isnan(xij):\n",
" result[i] += xij\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 394
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"X = np.random.RandomState(0).randn(1000000.0).reshape(1000, 1000)\n",
"X[X < -1] = np.nan"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 370
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit np.nansum(X, axis=-1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 5.51 ms per loop\n"
]
}
],
"prompt_number": 332
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit nansum_last_axis(X)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 2.97 ms per loop\n"
]
}
],
"prompt_number": 411
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit bottleneck.nansum(X, axis=-1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 2.24 ms per loop\n"
]
}
],
"prompt_number": 372
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# I wanted to write something like this, but the result (depending on how I\n",
"# write it) is either not accelerated or broken:\n",
"\n",
"# can't handle arguments like float64 without brackets\n",
"@guvectorize(['void(float64[:], float64[:])'],\n",
" '(x)->()')\n",
"def nansum_last_axis_buggy(x, result):\n",
" result = 0.0\n",
" for i in range(x.shape[0]):\n",
" xi = x[i]\n",
" if not np.isnan(xi):\n",
" result += xi\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 409
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"nansum_last_axis_buggy(X)[:10]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 412,
"text": [
"array([ -1.49166815e-154, -1.49166815e-154, 2.12357514e-314,\n",
" 2.12357513e-314, 2.12357512e-314, 2.12357511e-314,\n",
" 2.12357509e-314, 2.12357508e-314, 2.12357507e-314,\n",
" 2.12357506e-314])"
]
}
],
"prompt_number": 412
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"count"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: count is not in bottleneck, but I decided to benchmark it anwyays"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"@jit\n",
"def count(x):\n",
" result = 0\n",
" for i in range(len(x)):\n",
" if not np.isnan(x[i]):\n",
" result += 1\n",
" return result\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 201
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit count(x)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000 loops, best of 3: 1.06 ms per loop\n"
]
}
],
"prompt_number": 202
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# note: this is basically the implementation in pandas\n",
"%timeit (~np.isnan(x)).sum()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 2.23 ms per loop\n"
]
}
],
"prompt_number": 203
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment