Skip to content

Instantly share code, notes, and snippets.

@joonro
Last active June 8, 2023 14:23
Show Gist options
  • Save joonro/0247661b0baf8ba380d5b5c54807eda7 to your computer and use it in GitHub Desktop.
Save joonro/0247661b0baf8ba380d5b5c54807eda7 to your computer and use it in GitHub Desktop.
[General Multiprocessing of pandas.DataFrame with multiple arguments] Using multiple arguments, spliting pandas.DataFrame #python #multiprocessing #pandas
import numpy as np
import pandas as pd
def cal_d(partial_panel, nvid_jt, movchr, r_all):
for r, row in enumerate(partial_panel.itertuples()):
showdate = pd.Timestamp(str(row.showdate))
nvid = row.nvid
reldate = movchr.at[nvid, 'reldate']
R_star = movchr.at[nvid, 'R*']
R_star_dm = movchr.at[nvid, 'R*_dm']
R_star_nv = movchr.at[nvid, 'R*_nv']
j = np.where(nvid_jt.loc[showdate] == nvid)[0][0]
num_r, R = r_all.loc[showdate, j, 'Combined', ['num_r', 'R']].values
num_r_dm, R_dm = r_all.loc[showdate, j, 'Daum', ['num_r', 'R']].values
num_r_nv, R_nv = r_all.loc[showdate, j, 'Naver', ['num_r', 'R']].values
d_dc = R_dm - R_star
d_dd = R_dm - R_star_dm
d_nc = R_nv - R_star
d_nn = R_nv - R_star_nv
partial_panel.at[row.Index, 'days_since_rel'] = (showdate - reldate).days
partial_panel.at[row.Index, 'R_dm'] = R_dm
partial_panel.at[row.Index, 'num_r_dm'] = num_r_dm
partial_panel.at[row.Index, 'R_nv'] = R_nv
partial_panel.at[row.Index, 'num_r_nv'] = num_r_nv
partial_panel.at[row.Index, 'd_dc'] = d_dc
partial_panel.at[row.Index, 'd_dd'] = d_dd
partial_panel.at[row.Index, 'd_nc'] = d_nc
partial_panel.at[row.Index, 'd_nn'] = d_nn
partial_panel.at[row.Index, 'R*'] = R_star
partial_panel.at[row.Index, 'R*_dm'] = R_star_dm
partial_panel.at[row.Index, 'R*_nv'] = R_star_nv
return partial_panel
import multiprocessing
from functools import partial
from multiprocessing_cal_d import cal_d
pool = multiprocessing.Pool() # will automatically get num cores including hyperthreads
# create a partial function with necessary data that will be read in cal_d
cal_d_partial = partial(cal_d, nvid_jt=nvid_jt, movchr=movchr, r_all=r_all)
partitions = multiprocessing.cpu_count() - 1
subpanels = np.array_split(panel, partitions)
# multiprocessing
panel_new = pd.concat(pool.map(cal_d_partial, subpanels))
pool.close() # finish processes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment