Last active
June 8, 2023 14:23
-
-
Save joonro/0247661b0baf8ba380d5b5c54807eda7 to your computer and use it in GitHub Desktop.
[General Multiprocessing of pandas.DataFrame with multiple arguments] Using multiple arguments, spliting pandas.DataFrame #python #multiprocessing #pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
def cal_d(partial_panel, nvid_jt, movchr, r_all): | |
for r, row in enumerate(partial_panel.itertuples()): | |
showdate = pd.Timestamp(str(row.showdate)) | |
nvid = row.nvid | |
reldate = movchr.at[nvid, 'reldate'] | |
R_star = movchr.at[nvid, 'R*'] | |
R_star_dm = movchr.at[nvid, 'R*_dm'] | |
R_star_nv = movchr.at[nvid, 'R*_nv'] | |
j = np.where(nvid_jt.loc[showdate] == nvid)[0][0] | |
num_r, R = r_all.loc[showdate, j, 'Combined', ['num_r', 'R']].values | |
num_r_dm, R_dm = r_all.loc[showdate, j, 'Daum', ['num_r', 'R']].values | |
num_r_nv, R_nv = r_all.loc[showdate, j, 'Naver', ['num_r', 'R']].values | |
d_dc = R_dm - R_star | |
d_dd = R_dm - R_star_dm | |
d_nc = R_nv - R_star | |
d_nn = R_nv - R_star_nv | |
partial_panel.at[row.Index, 'days_since_rel'] = (showdate - reldate).days | |
partial_panel.at[row.Index, 'R_dm'] = R_dm | |
partial_panel.at[row.Index, 'num_r_dm'] = num_r_dm | |
partial_panel.at[row.Index, 'R_nv'] = R_nv | |
partial_panel.at[row.Index, 'num_r_nv'] = num_r_nv | |
partial_panel.at[row.Index, 'd_dc'] = d_dc | |
partial_panel.at[row.Index, 'd_dd'] = d_dd | |
partial_panel.at[row.Index, 'd_nc'] = d_nc | |
partial_panel.at[row.Index, 'd_nn'] = d_nn | |
partial_panel.at[row.Index, 'R*'] = R_star | |
partial_panel.at[row.Index, 'R*_dm'] = R_star_dm | |
partial_panel.at[row.Index, 'R*_nv'] = R_star_nv | |
return partial_panel | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
from functools import partial | |
from multiprocessing_cal_d import cal_d | |
pool = multiprocessing.Pool() # will automatically get num cores including hyperthreads | |
# create a partial function with necessary data that will be read in cal_d | |
cal_d_partial = partial(cal_d, nvid_jt=nvid_jt, movchr=movchr, r_all=r_all) | |
partitions = multiprocessing.cpu_count() - 1 | |
subpanels = np.array_split(panel, partitions) | |
# multiprocessing | |
panel_new = pd.concat(pool.map(cal_d_partial, subpanels)) | |
pool.close() # finish processes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment