Skip to content

Instantly share code, notes, and snippets.

@mlgill
Forked from yong27/apply_df_by_multiprocessing.py
Last active August 29, 2015 14:23
Show Gist options
  • Save mlgill/c8ad23cc6d88d1178abe to your computer and use it in GitHub Desktop.
Save mlgill/c8ad23cc6d88d1178abe to your computer and use it in GitHub Desktop.
import multiprocessing
import pandas as pd
import numpy as np
def _apply_df(args):
df, func, kwargs = args
return df.apply(func, **kwargs)
def apply_by_multiprocessing(df, func, **kwargs):
workers = kwargs.pop('workers')
pool = multiprocessing.Pool(processes=workers)
result = pool.map(_apply_df, [(d, func, kwargs)
for d in np.array_split(df, workers)])
pool.close()
return pd.concat(list(result))
def square(x):
return x**x
if __name__ == '__main__':
df = pd.DataFrame({'a':range(10), 'b':range(10)})
apply_by_multiprocessing(df, square, axis=1, workers=4)
## run by 4 processors
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment