Skip to content

Instantly share code, notes, and snippets.

@Erotemic
Last active October 18, 2018 14:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Erotemic/eb3be18a3324d129154b95cff539c72b to your computer and use it in GitHub Desktop.
Save Erotemic/eb3be18a3324d129154b95cff539c72b to your computer and use it in GitHub Desktop.
def benchmark_pandas():
import timerit
import pandas as pd
import numpy as np
column_data = {
key: np.random.rand(1000)
for key in map(chr, range(ord('a'), ord('z')))
}
data_frame = pd.DataFrame(column_data)
print('\n-----')
for timer in timerit.Timerit(100, bestof=10, label='Row Access (PANDAS.loc)'):
with timer:
for i in range(len(data_frame)):
data_frame.loc[i]
# Timed best=99.227 ms, mean=101.046 ± 1.6 ms for Row Access (PANDAS.loc)
for timer in timerit.Timerit(100, bestof=10, label='Row Access (PANDAS.iloc)'):
with timer:
for i in range(len(data_frame)):
data_frame.iloc[i]
# Timed best=81.863 ms, mean=83.144 ± 1.2 ms for Row Access (PANDAS)
for timer in timerit.Timerit(100, bestof=10, label='Row Access (PANDAS.iterrows)'):
with timer:
for row in data_frame.iterrows():
pass
# Timed best=23.664 ms, mean=23.948 ± 0.3 ms for Row Access (PANDAS.iterrows)
for timer in timerit.Timerit(100, bestof=10, label='Row Access (DICT)'):
with timer:
for i in range(len(data_frame)):
{k: column_data[k][i] for k in column_data.keys()}
# Timed best=4.151 ms, mean=4.271 ± 0.1 ms for Row Access (DICT)
print('\n-----')
for timer in timerit.Timerit(100, bestof=10, label='Column Access (PANDAS)'):
with timer:
for k in data_frame.keys():
data_frame[k]
# Timed best=72.979 µs, mean=74.541 ± 1.0 µs for Column Access (PANDAS)
for timer in timerit.Timerit(100, bestof=10, label='Column Access (DICT)'):
with timer:
for k in column_data.keys():
column_data[k]
# Timed best=2.548 µs, mean=2.577 ± 0.0 µs for Column Access (DICT)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment