Skip to content

Instantly share code, notes, and snippets.

@r5v9
Created October 18, 2017 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r5v9/472e0c75bbd651ed49b9d311a2f52509 to your computer and use it in GitHub Desktop.
Save r5v9/472e0c75bbd651ed49b9d311a2f52509 to your computer and use it in GitHub Desktop.
import multiprocessing
from multiprocessing import Pool
import pandas as pd
import numpy as np
def apply_parallel(df, func):
cores = multiprocessing.cpu_count()
partitions = np.array_split(df, cores)
with Pool(cores) as pool:
df = pd.concat(pool.map(func, partitions))
return df
def func_apply_row(row):
return "whatever"
def apply_row(df):
return df.apply(func_apply_row, axis=1)
df['some_col'] = apply_parallel(df, apply_row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment