Skip to content

Instantly share code, notes, and snippets.

@Coldsp33d
Last active November 24, 2022 17:22
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Coldsp33d/fa2101cded4699786f91f1a0215df8e5 to your computer and use it in GitHub Desktop.
Save Coldsp33d/fa2101cded4699786f91f1a0215df8e5 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import perfplot
from string import ascii_lowercase as LOWER, ascii_uppercase as UPPER
import random
# Note: The copy() calls are needed here because `pop()` mutates the dataframe inplace
# so it is essential to make a copy() we don't want to mutate the output across runs
def apply_drop(df):
return df.join(df['val'].apply(pd.Series),).drop('val', axis=1)
def json_normalise_drop(df):
return df.join(pd.json_normalize(df['val'])).drop('val', axis=1)
def tolist_drop(df):
return df.join(pd.DataFrame(df['val'].tolist())).drop('val', axis=1)
random.seed(0)
letters = (LOWER + UPPER)[::2]
M, N = 10, 10
df = pd.DataFrame({'idx': np.arange(N), 'val': [{v: k for k, v in enumerate(random.sample(letters, M), 1)} for _ in range(N)]})
kernels = [apply_drop, json_normalise_drop, tolist_drop]
perfplot.show(
setup=lambda n: pd.concat([df] * n, ignore_index=True),
kernels=kernels,
labels=[str(k.__name__) for k in kernels],
n_range=[2**k for k in range(12)],
xlabel='N',
logx=True,
logy=True,
equality_check=lambda df1, df2: df1.equals(df2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment