Skip to content

Instantly share code, notes, and snippets.

@michaeldorner
Last active May 1, 2018 18:31

Revisions

  1. michaeldorner revised this gist May 1, 2018. 1 changed file with 13 additions and 3 deletions.
    16 changes: 13 additions & 3 deletions benchmark_pandas_concat_append.py
    Original file line number Diff line number Diff line change
    @@ -8,9 +8,19 @@

    if __name__ == '__main__':
    import timeit

    print('ignore_index = False')
    print('small, append', timeit.timeit('df = df_small.append(df_small)', setup=setup, number=1000))
    print('medium, append', timeit.timeit('df = df_medium.append(df_medium)', setup=setup, number=1000))
    print('large, append', timeit.timeit('df = df_large.append(df_large)', setup=setup, number=1000))
    print('small, concat', timeit.timeit('df = pd.concat([df_small], ignore_index = True)', setup=setup, number=1000))
    print('medium, concat', timeit.timeit('df = pd.concat([df_medium], ignore_index = True)', setup=setup, number=1000))
    print('large, concat', timeit.timeit('df = pd.concat([df_large], ignore_index = True)', setup=setup, number=1000))
    print('small, concat ignore_index=True', timeit.timeit('df = pd.concat([df_small, df_small])', setup=setup, number=1000))
    print('medium, concat ignore_index=True', timeit.timeit('df = pd.concat([df_medium, df_medium])', setup=setup, number=1000))
    print('large, concat ignore_index=True', timeit.timeit('df = pd.concat([df_large, df_large])', setup=setup, number=1000))

    print('ignore_index = True')
    print('small, append', timeit.timeit('df = df_small.append(df_small, ignore_index = True)', setup=setup, number=1000))
    print('medium, append', timeit.timeit('df = df_medium.append(df_medium, ignore_index = True)', setup=setup, number=1000))
    print('large, append', timeit.timeit('df = df_large.append(df_large, ignore_index = True)', setup=setup, number=1000))
    print('small, concat ignore_index=True', timeit.timeit('df = pd.concat([df_small, df_small], ignore_index = True)', setup=setup, number=1000))
    print('medium, concat ignore_index=True', timeit.timeit('df = pd.concat([df_medium, df_medium], ignore_index = True)', setup=setup, number=1000))
    print('large, concat ignore_index=True', timeit.timeit('df = pd.concat([df_large, df_large], ignore_index = True)', setup=setup, number=1000))
  2. michaeldorner revised this gist Jan 9, 2018. No changes.
  3. michaeldorner revised this gist Jan 9, 2018. No changes.
  4. michaeldorner revised this gist Jan 9, 2018. No changes.
  5. michaeldorner created this gist Jan 9, 2018.
    16 changes: 16 additions & 0 deletions benchmark_pandas_concat_append.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,16 @@
    setup = '''
    import pandas as pd
    import numpy as np
    df_small = pd.DataFrame(np.random.randn(10, 1), columns=[str(1)])
    df_medium = pd.DataFrame(np.random.randn(100, 10), columns=[str(i) for i in range(0, 10)])
    df_large = pd.DataFrame(np.random.randn(1000, 100), columns=[str(i) for i in range(0, 100)])
    '''

    if __name__ == '__main__':
    import timeit
    print('small, append', timeit.timeit('df = df_small.append(df_small)', setup=setup, number=1000))
    print('medium, append', timeit.timeit('df = df_medium.append(df_medium)', setup=setup, number=1000))
    print('large, append', timeit.timeit('df = df_large.append(df_large)', setup=setup, number=1000))
    print('small, concat', timeit.timeit('df = pd.concat([df_small], ignore_index = True)', setup=setup, number=1000))
    print('medium, concat', timeit.timeit('df = pd.concat([df_medium], ignore_index = True)', setup=setup, number=1000))
    print('large, concat', timeit.timeit('df = pd.concat([df_large], ignore_index = True)', setup=setup, number=1000))