Skip to content

Instantly share code, notes, and snippets.

@dlovell
Created September 28, 2014 12:19
Show Gist options
  • Save dlovell/3a49b18b224d598ee9be to your computer and use it in GitHub Desktop.
Save dlovell/3a49b18b224d598ee9be to your computer and use it in GitHub Desktop.
Programmatic creation of vbench Bechmarks for use in pandas testing large ngroups values
from vbench.api import Benchmark
from datetime import datetime
common_setup = """from pandas_vb_common import *
"""
setup = common_setup + """
np.random.seed(1234)
ngroups = 100
size = ngroups * 10
rng = np.arange(ngroups)
df = DataFrame(dict(
timestamp=rng.take(np.random.randint(0, ngroups, size=size)),
value=np.random.randint(0, size, size=size)
))
"""
other_list = [
'groups',
'name',
'ngroups',
]
dont_run_func_list = [
'agg',
'apply',
'bfill',
'corr',
'cov',
'dtype',
'ffill',
'fillna',
'filter',
'get_group',
'hist',
'idxmax',
'idxmin',
'indices',
'irow',
'nlargest',
'nsmallest',
'nth',
'ohlc',
'plot',
'quantile',
'resample',
'shift',
'take',
'transform',
'tshift',
]
no_arg_func_list = [
'all',
'any',
'count',
'cumcount',
'cummax',
'cummin',
'cumprod',
'cumsum',
'describe',
'diff',
'first',
'head',
'last',
'mad',
'max',
'mean',
'median',
'min',
'nunique',
'pct_change',
'prod',
'rank',
'sem',
'size',
'skew',
'std',
'sum',
'tail',
'unique',
'var',
'value_counts',
]
_stmt_template = "df.groupby('value')['timestamp'].%s"
START_DATE = datetime(2011, 7, 1)
def add_Benchmark(func_name, func_args=''):
benchmark_name = 'groupby_large_ngroups_%s' % func_name
stmt = _stmt_template % ('%s(%s)' % (func_name, func_args))
benchmark_obj = Benchmark(stmt, setup, start_date=START_DATE)
# MUST set name
benchmark_obj.name = benchmark_name
globals()[benchmark_name] = benchmark_obj
for func_name in no_arg_func_list:
add_Benchmark(func_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment