dlovell/gropuby.py

## gropuby.py
from vbench.api import Benchmark
from datetime import datetime

common_setup = """from pandas_vb_common import *
"""


setup = common_setup + """
np.random.seed(1234)
ngroups = 100
size = ngroups * 10
rng = np.arange(ngroups)
df = DataFrame(dict(
    timestamp=rng.take(np.random.randint(0, ngroups, size=size)),
    value=np.random.randint(0, size, size=size)
))
"""


other_list = [
    'groups',
    'name',
    'ngroups',
]

dont_run_func_list = [
    'agg',
    'apply',
    'bfill',
    'corr',
    'cov',
    'dtype',
    'ffill',
    'fillna',
    'filter',
    'get_group',
    'hist',
    'idxmax',
    'idxmin',
    'indices',
    'irow',
    'nlargest',
    'nsmallest',
    'nth',
    'ohlc',
    'plot',
    'quantile',
    'resample',
    'shift',
    'take',
    'transform',
    'tshift',
]

no_arg_func_list = [
    'all',
    'any',
    'count',
    'cumcount',
    'cummax',
    'cummin',
    'cumprod',
    'cumsum',
    'describe',
    'diff',
    'first',
    'head',
    'last',
    'mad',
    'max',
    'mean',
    'median',
    'min',
    'nunique',
    'pct_change',
    'prod',
    'rank',
    'sem',
    'size',
    'skew',
    'std',
    'sum',
    'tail',
    'unique',
    'var',
    'value_counts',
]


_stmt_template = "df.groupby('value')['timestamp'].%s"
START_DATE = datetime(2011, 7, 1)
def add_Benchmark(func_name, func_args=''):
    benchmark_name = 'groupby_large_ngroups_%s' % func_name
    stmt = _stmt_template % ('%s(%s)' % (func_name, func_args))
    benchmark_obj = Benchmark(stmt, setup, start_date=START_DATE)
    # MUST set name
    benchmark_obj.name = benchmark_name
    globals()[benchmark_name] = benchmark_obj

for func_name in no_arg_func_list:
    add_Benchmark(func_name)
	from vbench.api import Benchmark
	from datetime import datetime

	common_setup = """from pandas_vb_common import *
	"""


	setup = common_setup + """
	np.random.seed(1234)
	ngroups = 100
	size = ngroups * 10
	rng = np.arange(ngroups)
	df = DataFrame(dict(
	timestamp=rng.take(np.random.randint(0, ngroups, size=size)),
	value=np.random.randint(0, size, size=size)
	))
	"""


	other_list = [
	'groups',
	'name',
	'ngroups',
	]

	dont_run_func_list = [
	'agg',
	'apply',
	'bfill',
	'corr',
	'cov',
	'dtype',
	'ffill',
	'fillna',
	'filter',
	'get_group',
	'hist',
	'idxmax',
	'idxmin',
	'indices',
	'irow',
	'nlargest',
	'nsmallest',
	'nth',
	'ohlc',
	'plot',
	'quantile',
	'resample',
	'shift',
	'take',
	'transform',
	'tshift',
	]

	no_arg_func_list = [
	'all',
	'any',
	'count',
	'cumcount',
	'cummax',
	'cummin',
	'cumprod',
	'cumsum',
	'describe',
	'diff',
	'first',
	'head',
	'last',
	'mad',
	'max',
	'mean',
	'median',
	'min',
	'nunique',
	'pct_change',
	'prod',
	'rank',
	'sem',
	'size',
	'skew',
	'std',
	'sum',
	'tail',
	'unique',
	'var',
	'value_counts',
	]


	_stmt_template = "df.groupby('value')['timestamp'].%s"
	START_DATE = datetime(2011, 7, 1)
	def add_Benchmark(func_name, func_args=''):
	benchmark_name = 'groupby_large_ngroups_%s' % func_name
	stmt = _stmt_template % ('%s(%s)' % (func_name, func_args))
	benchmark_obj = Benchmark(stmt, setup, start_date=START_DATE)
	# MUST set name
	benchmark_obj.name = benchmark_name
	globals()[benchmark_name] = benchmark_obj

	for func_name in no_arg_func_list:
	add_Benchmark(func_name)