Skip to content

Instantly share code, notes, and snippets.

@jmoiron
Created September 24, 2010 21:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jmoiron/596073 to your computer and use it in GitHub Desktop.
Save jmoiron/596073 to your computer and use it in GitHub Desktop.
# parallelize a function n ways, automatically spliting a big list
# of arguments into n roughly equal sized groups
import math
from multiprocessing import Pool
def split(iterable, n):
"""Splits an iterable up into n roughly equally sized groups."""
groupsize = int(math.floor(len(iterable) / float(n)))
remainder = len(iterable) % n
sizes = [groupsize+1 for i in range(remainder)] + [groupsize]*(n-remainder)
pivot, groups = 0, []
for size in sizes:
groups.append(iterable[pivot:pivot+size])
pivot += size
return groups
def parallelize(n, function, args):
"""Parallelizes a function n ways. Returns a list of results. The
function must be one that takes a list of arguments and operates over
them all, preferably each item dealt with in isolation."""
pool = Pool(n)
arg_groups = split(args, n)
waiters = []
for i in range(n):
waiters.append(
pool.apply_async(function, (arg_groups[i],))
)
results = [r.get() for r in waiters]
return results
def auto_parallelize(function, args):
"""Auto-parallelizes a function based on available cpu cores."""
n = multiprocessing.cpu_count()
return parallelize(n, function, args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment