Skip to content

Instantly share code, notes, and snippets.

@DevStarSJ
Created April 13, 2018 02:21
Show Gist options
  • Save DevStarSJ/b21b23f6555c92f599f40f9fdf40b12a to your computer and use it in GitHub Desktop.
Save DevStarSJ/b21b23f6555c92f599f40f9fdf40b12a to your computer and use it in GitHub Desktop.
# pandas DataFrame iterate rows and assign to new column test
# - data size : 329 rows
# - unique of index one : 91
# - unique of index one : 329
# - iteration way
# - using index
# - make sub df one depth
# - make sub df two depth
# - using df index
# - groupby one level
# - groupby two level
# - groupby two level with minimize data read
# - reset index
# - make sub df one depth
# - make sub df two depth
# - using df index
# - groupby one level
# - groupby two level
# - groupby two level with minimize data read
# result
# - reset index - groupby one level is the fastest way.
# - Using groupby is faster than other ways.
# - If you use groupby, without index and lower level is good.
# - Never use DataFrame row index.
# using index - make sub df one depth : 0.7064568996429443
A = time.time()
for room_type_id in df2.index.get_level_values('room_type_id'):
df_id2 = df2.loc[df2.index.get_level_values('room_type_id') == room_type_id]
area_danji_id = df_id2.index.get_level_values('area_danji_id')[0]
sales_price = df_id2['sales_price'].astype(int).values[0]
low_limit = df_id2['low_limit'].astype(float).values[0]
df2.loc[df2.index.get_level_values('room_type_id') == room_type_id, 'test_value'] = sales_price * low_limit
print("Time: ", time.time()-A)
# using index - make sub df two depth : 0.7042222023010254
A = time.time()
for area_danji_id in np.unique(df2.index.get_level_values('area_danji_id')):
df_apt = df2.loc[df2.index.get_level_values('area_danji_id') == area_danji_id]
low_limit = df_apt['low_limit'].astype(float).values[0]
for room_type_id in df_apt.index.get_level_values('room_type_id'):
df_room = df_apt.loc[df_apt.index.get_level_values('room_type_id') == room_type_id]
sales_price = df_room['sales_price'].astype(int).values[0]
df2.loc[df2.index.get_level_values('room_type_id') == room_type_id, 'test_value'] = low_limit * sales_price
print("Time: ", time.time()-A)
# using index - using df index : 15.84641695022583
A = time.time()
for room_type_id in df2.index.get_level_values('room_type_id'):
idx_room = df2.index.get_level_values('room_type_id') == room_type_id
area_danji_id = df2[idx_room].index.get_level_values('area_danji_id')[0]
sales_price = df2[idx_room]['sales_price'].astype(int).values[0]
low_limit = df2[idx_room]['low_limit'].astype(float).values[0]
df2[idx_room]['test_value'] = sales_price * low_limit
print("Time: ", time.time()-A)
# using index - groupby one level : 0.23923683166503906
def do_room(df):
area_danji_id = df.index.get_level_values('area_danji_id')[0]
sales_price = df['sales_price'].astype(int).values[0]
low_limit = df['low_limit'].astype(float).values[0]
df['test_value'] = sales_price * low_limit
return df
A = time.time()
df3 = df2.groupby(df2.index.get_level_values('room_type_id')).apply(do_room)
print("Time: ", time.time()-A)
# using index - groupby two level : 0.5106461048126221
def do_apt(df):
return df.groupby(df.index.get_level_values('room_type_id')).apply(do_room)
def do_room(df):
area_danji_id = df.index.get_level_values('area_danji_id')[0]
low_limit = df['low_limit'].astype(float).values[0]
sales_price = df['sales_price'].astype(int).values[0]
df['test_value'] = sales_price * low_limit
return df
A = time.time()
df3 = df2.groupby(df2.index.get_level_values('area_danji_id')).apply(do_apt)
print("Time: ", time.time()-A)
# using index - groupby two level with minimize data read : 0.49247312545776367
def do_apt(df):
area_danji_id = df.index.get_level_values('area_danji_id')[0]
low_limit = df['low_limit'].astype(float).values[0]
do_it = curryr(do_room)(area_danji_id, low_limit)
return df.groupby(df.index.get_level_values('room_type_id')).apply(do_it)
def do_room(df, area_danji_id, low_limit):
sales_price = df['sales_price'].astype(int).values[0]
df['test_value'] = sales_price * low_limit
return df
A = time.time()
df3 = df2.groupby(df2.index.get_level_values('area_danji_id')).apply(do_apt)
print("Time: ", time.time()-A)
# reset index - make sub df two depth : 1.720639944076538
A = time.time()
for area_danji_id in df3['area_danji_id'].unique():
df_apt = df3.loc[df3['area_danji_id'] == area_danji_id]
low_limit = df_apt['low_limit'].astype(float).values[0]
for room_type_id in df_apt['room_type_id']:
df_room = df_apt.loc[df_apt['room_type_id'] == room_type_id]
sales_price = df_room['sales_price'].astype(int).values[0]
df3.loc[df3['room_type_id'] == room_type_id, 'test_value'] = low_limit * sales_price
print("Time: ", time.time()-A)
# reset index - using index : 100.720639944076538
A = time.time()
for room_type_id in df3['room_type_id']:
idx_room = df3['room_type_id'] == room_type_id
area_danji_id = df3[idx_room]['area_danji_id'].astype(int).values[0]
sales_price = df3[idx_room]['sales_price'].astype(int).values[0]
low_limit = df3[idx_room]['low_limit'].astype(float).values[0]
df3[idx_room]['test_value'] = sales_price * low_limit
print("Time: ", time.time()-A)
# reset index - make sub df one depth : 1.8659281730651855
A = time.time()
for room_type_id in df3['room_type_id']:
df_id2 = df3.loc[df3['room_type_id'] == room_type_id]
area_danji_id = df_id2['area_danji_id'].astype(int).values[0]
sales_price = df_id2['sales_price'].astype(int).values[0]
low_limit = df_id2['low_limit'].astype(float).values[0]
df3.loc[df3['room_type_id'] == room_type_id, 'test_value'] = sales_price * low_limit
print("Time: ", time.time()-A)
# reset index - groupby one level : 0.19803690910339355
def do_room(df):
area_danji_id = df['area_danji_id'].astype(int).values[0]
sales_price = df['sales_price'].astype(int).values[0]
low_limit = df['low_limit'].astype(float).values[0]
df['test_value'] = sales_price * low_limit
return df
A = time.time()
df5 = df4.groupby(df4['room_type_id']).apply(do_room)
print("Time: ", time.time()-A)
# reset index - groupby two level : 0.3681831359863281
def do_apt(df):
return df.groupby(df['room_type_id']).apply(do_room)
def do_room(df):
area_danji_id = df['area_danji_id'].astype(int).values[0]
low_limit = df['low_limit'].astype(float).values[0]
sales_price = df['sales_price'].astype(int).values[0]
df['test_value'] = sales_price * low_limit
return df
A = time.time()
df5 = df4.groupby(df4['area_danji_id']).apply(do_apt)
print("Time: ", time.time()-A)
# reset index - groupby two level : 0.2985241413116455
def do_apt(df):
area_danji_id = df['area_danji_id'].astype(int).values[0]
low_limit = df['low_limit'].astype(float).values[0]
do_it = curryr(do_room)(area_danji_id, low_limit)
return df.groupby(df['room_type_id']).apply(do_it)
def do_room(df, area_danji_id, low_limit):
sales_price = df['sales_price'].astype(int).values[0]
df['test_value'] = sales_price * low_limit
return df
A = time.time()
df5 = df4.groupby(df4['area_danji_id']).apply(do_apt)
print("Time: ", time.time()-A)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment