Skip to content

Instantly share code, notes, and snippets.

@thangarajan8
Created September 15, 2021 10:19
Show Gist options
  • Save thangarajan8/f2358621beef40e945f825f8aad3b91e to your computer and use it in GitHub Desktop.
Save thangarajan8/f2358621beef40e945f825f8aad3b91e to your computer and use it in GitHub Desktop.
import pandas as pd
import time
import numpy as np
#http://eforexcel.com/wp/wp-content/uploads/2020/09/5m-Sales-Records.zip
df = pd.read_csv("5m Sales Records.csv")
def filter1(df):
start_time = time.time()
for i in df.Country.unique():
df[(df.Country==i)]
print(f"Total time {time.time()-start_time}")
#90 sec
def filter2(df):
start_time = time.time()
for i in df.Country.unique():
df.loc[(df.Country==i)]
print(f"Total time {time.time()-start_time}")
#81.4 Sec
def filter_numpy(df):
start_time = time.time()
for i in df.Country.unique():
df.loc[np.in1d(df.Country,i)]
print(f"Total time {time.time()-start_time}")
filter1(df)
filter2(df)
filter_numpy(df)
#Total time 92.1524384021759
#Total time 97.74113202095032
#Total time 43.44165015220642
#winner is filter_numpy
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment