Skip to content

Instantly share code, notes, and snippets.

@Coldsp33d
Last active February 6, 2024 17:11
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Coldsp33d/ea080f580ab3a3b75c2f40c201d50164 to your computer and use it in GitHub Desktop.
Save Coldsp33d/ea080f580ab3a3b75c2f40c201d50164 to your computer and use it in GitHub Desktop.
Benchmarking different methods for creating empty dataframes from scratch
import pandas as pd
import perfplot
def append(n):
df = pd.DataFrame(columns=['A', 'B', 'C'])
for _ in range(n):
df = df.append({'A': 1, 'B': 12.3, 'C': 'xyz'}, ignore_index=True) # yuck
return df
def list_append(n):
data = []
for _ in range(n):
data.append([1, 12.3, 'xyz'])
return pd.DataFrame(data, columns=['A', 'B', 'C'])
def loc_append(n):
df = pd.DataFrame(columns=['A', 'B', 'C'])
for _ in range(n):
df.loc[len(df)] = [1, 12.3, 'xyz']
return df
kernels = [append, list_append, loc_append]
perfplot.show(
setup=lambda n: n,
kernels=kernels,
labels=[k.__name__ for k in kernels],
n_range=[i for i in range(0, 1000, 50)],
xlabel='N',
logx=True,
logy=True,
equality_check=None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment