Skip to content

Instantly share code, notes, and snippets.

@yanmhlv
Created June 9, 2015 13:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yanmhlv/383437e1cf478e9ae922 to your computer and use it in GitHub Desktop.
Save yanmhlv/383437e1cf478e9ae922 to your computer and use it in GitHub Desktop.
import timeit
time_python = timeit.timeit("""
from itertools import groupby
from random import randint
import pandas as pd
accruals = [
{'account': i, 'value': i * randint(0, 20), 'foo': randint(0, 10)}
for i in range(5000)
]
payments = [
{'account': i, 'value': i * randint(0, 10), 'foo': randint(0, 10)}
for i in range(5000)
]
accruals = groupby(accruals, key=lambda x: (x['account'], x['foo']))
accruals = {key: list(values) for key, values in accruals}
payments = groupby(payments, key=lambda x: (x['account'], x['foo']))
payments = {key: list(values) for key, values in payments}
result = {}
for accrual_key, values in accruals.items():
result[accrual_key] = sum(v['value'] for v in values) - sum(v['value'] for v in payments.get(accrual_key, []))
for payment_key, values in payments.items():
if payment_key in result:
continue
result[payment_key] = 0 - sum(v['value'] for v in values)
# result = pd.DataFrame([{'account': key[0], 'foo': key[1], 'saldo': value} for key, value in result.items()])
""", number=100)
print('time_python', time_python)
time_pandas = timeit.timeit("""
import pandas as pd
from random import randint
accruals = [
{'account': i, 'value': i * randint(0, 20), 'foo': randint(0, 10)}
for i in range(5000)
]
payments = [
{'account': i, 'value': i * randint(0, 10), 'foo': randint(0, 10)}
for i in range(5000)
]
accruals = pd.DataFrame(accruals, columns=['account', 'value', 'foo']).set_index(['account', 'foo'])
payments = pd.DataFrame(payments, columns=['account', 'value', 'foo']).set_index(['account', 'foo'])
# [row.to_dict() for _, row in accruals.sub(payments, fill_value=0).iterrows()]
# accruals.sub(payments, fill_value=0).to_dict('records')
df = accruals.sub(payments, fill_value=0)
# df.to_dict('records')
df.to_json('records')
""", number=100)
print('time_pandas', time_pandas)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment