Skip to content

Instantly share code, notes, and snippets.

@seibert
Created August 7, 2015 14:50
Show Gist options
  • Save seibert/c3dfa376428f15d90d19 to your computer and use it in GitHub Desktop.
Save seibert/c3dfa376428f15d90d19 to your computer and use it in GitHub Desktop.
from pandas.computation import engines
from numba import vectorize
class HSAEngine(engines.AbstractEngine):
"""Evaluate an expression using the Numba target.
"""
has_neg_frac = False
_func_cache = {}
def __init__(self, expr):
super(HSAEngine, self).__init__(expr)
#print('__init__:', expr)
self._args = [n for n in expr.names if isinstance(n, str)]
function_name = '__numba_pandas_eval_ufunc'
function_str = '''def %s(%s):
return %s
''' % (function_name, ','.join(self._args), str(expr))
if function_str in HSAEngine._func_cache:
#print('cache hit')
self._ufunc = HSAEngine._func_cache[function_str]
else:
#print('cache miss')
scope = {}
exec(function_str, scope)
self._ufunc = vectorize(nopython=True)(scope[function_name])
HSAEngine._func_cache[function_str] = self._ufunc
def _evaluate(self):
env = self.expr.env
call_args = [env.resolve(name, False) for name in self._args]
return self._ufunc(*call_args)
engines._engines['hsa'] = HSAEngine
import engine_hsa # register new engine
import pandas as pd
a = pd.DataFrame(dict(x=[1,2,3,4], y=[2,4,6,8]))
print('Input:', type(a), '\n', a)
b = a.eval('x + y', engine='hsa')
b = a.eval('x + y', engine='hsa')
print('Output:', type(b), '\n', b)
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import engine_hsa\n",
"import numpy as np\n",
"import pandas as pd\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n = 1000000\n",
"big_df = pd.DataFrame(\n",
" dict(x=np.random.uniform(size=n).astype(np.float32), \n",
" y=np.random.uniform(size=n).astype(np.float32)))\n",
"#big_df"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 22 ms per loop\n",
"100 loops, best of 3: 7.39 ms per loop\n",
"100 loops, best of 3: 8.74 ms per loop\n"
]
}
],
"source": [
"%timeit big_df.eval('(x**2 + y**2)**0.5', engine='python')\n",
"%timeit big_df.eval('(x**2 + y**2)**0.5', engine='numexpr')\n",
"%timeit big_df.eval('(x**2 + y**2)**0.5', engine='hsa')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"> \u001b[0;32m/work/continuum/clients/amd/pandas_hsa/engine_hsa.py\u001b[0m(19)\u001b[0;36m__init__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m 18 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m---> 19 \u001b[0;31m ''' % (function_name, ','.join(self._args), str(expr))\n",
"\u001b[0m\u001b[0;32m 20 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\n",
"ipdb> self._args\n",
"['x', 2.0, 'y']\n",
"ipdb> q\n"
]
}
],
"source": [
"%debug"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment