Skip to content

Instantly share code, notes, and snippets.

@RobMulla
Created April 2, 2022 02:45
Show Gist options
  • Save RobMulla/f04b144bb766b692f9314e3782d724d3 to your computer and use it in GitHub Desktop.
Save RobMulla/f04b144bb766b692f9314e3782d724d3 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
def get_dataset(size):
df = pd.DataFrame()
df['position'] = np.random.choice(['left','middle','right'], size)
df['age'] = np.random.randint(1, 50, size)
df['team'] = np.random.choice(['red','blue','yellow','green'], size)
df['win'] = np.random.choice(['yes','no'], size)
df['prob'] = np.random.uniform(0, 1, size)
return df
def set_dtypes(df):
df['position'] = df['position'].astype('category')
df['team'] = df['team'].astype('category')
df['age'] = df['age'].astype('int8')
df['prob'] = df['prob'].astype('float32')
df['win'] = df['win'].map({'yes':True, 'no':False})
return df
df = get_dataset(1_000_000)
%timeit df['age_rank'] = df.groupby(['team','position'])['age'].rank()
%timeit df['prob_rank'] = df.groupby(['team','position'])['prob'].rank()
%timeit df['win_prob_rank'] = df.groupby(['team','position','win'])['prob'].rank()
df = get_dataset(1_000_000)
df = set_dtypes(df)
%timeit df['age_rank'] = df.groupby(['team','position'])['age'].rank()
%timeit df['prob_rank'] = df.groupby(['team','position'])['prob'].rank()
%timeit df['win_prob_rank'] = df.groupby(['team','position','win'])['prob'].rank()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment