ThomasParistech/multi_process.py

## multi_process.py
# /usr/bin/python3
"""Process files in parallel."""
import math
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

import numpy as np
from mpire import WorkerPool
from tqdm import tqdm

from num_jobs import get_num_jobs

_PrimitiveType = Union[str, float, int, bool]
KwargsType = Dict[str, Union[_PrimitiveType, np.ndarray,
                             Dict[_PrimitiveType, _PrimitiveType]]]


def _batch_func(process_func: Callable[..., None],
                list_kwargs: List[KwargsType],
                shared_kwargs: KwargsType) -> None:
    """Process a batch"""
    for kwargs in list_kwargs:
        process_func(**kwargs, **shared_kwargs)


def multiprocess(process_func: Callable[..., None],
                 list_kwargs: List[KwargsType],
                 shared_kwargs: Optional[KwargsType],
                 bytes_per_process: Optional[int] = None):
    """
    Parallelize the process of a given function on a list of inputs

    Args:
        process_func: Process function to run in parallel on the inputs
        list_kwargs: List of process-specific keyword arguments, e.g. filenames
        shared_kwargs: Keyword arguments common to all processes, e.g. hyperparameters
        bytes_per_process: Optional estimation of the memory required by each
          individual process
    """
    if shared_kwargs is None:
        shared_kwargs = {}

    num_jobs = get_num_jobs(bytes_per_process)

    if num_jobs == 1:
        for kwargs in tqdm(list_kwargs):
            process_func(**kwargs, **shared_kwargs)
    else:
        # Chunk the list of arguments into N approximately equal batches
        n_process = len(list_kwargs)
        batch_size = math.ceil(n_process / float(num_jobs))
        with WorkerPool(n_jobs=num_jobs) as pool:
            params = [(process_func, list_kwargs[i: i + batch_size], shared_kwargs)
                      for i in range(0, n_process, batch_size)]

            pool.map_unordered(_batch_func, params, progress_bar=True)
	# /usr/bin/python3
	"""Process files in parallel."""
	import math
	from typing import Callable
	from typing import Dict
	from typing import List
	from typing import Optional
	from typing import Union

	import numpy as np
	from mpire import WorkerPool
	from tqdm import tqdm

	from num_jobs import get_num_jobs

	_PrimitiveType = Union[str, float, int, bool]
	KwargsType = Dict[str, Union[_PrimitiveType, np.ndarray,
	Dict[_PrimitiveType, _PrimitiveType]]]


	def _batch_func(process_func: Callable[..., None],
	list_kwargs: List[KwargsType],
	shared_kwargs: KwargsType) -> None:
	"""Process a batch"""
	for kwargs in list_kwargs:
	process_func(kwargs, shared_kwargs)


	def multiprocess(process_func: Callable[..., None],
	list_kwargs: List[KwargsType],
	shared_kwargs: Optional[KwargsType],
	bytes_per_process: Optional[int] = None):
	"""
	Parallelize the process of a given function on a list of inputs

	Args:
	process_func: Process function to run in parallel on the inputs
	list_kwargs: List of process-specific keyword arguments, e.g. filenames
	shared_kwargs: Keyword arguments common to all processes, e.g. hyperparameters
	bytes_per_process: Optional estimation of the memory required by each
	individual process
	"""
	if shared_kwargs is None:
	shared_kwargs = {}

	num_jobs = get_num_jobs(bytes_per_process)

	if num_jobs == 1:
	for kwargs in tqdm(list_kwargs):
	process_func(kwargs, shared_kwargs)
	else:
	# Chunk the list of arguments into N approximately equal batches
	n_process = len(list_kwargs)
	batch_size = math.ceil(n_process / float(num_jobs))
	with WorkerPool(n_jobs=num_jobs) as pool:
	params = [(process_func, list_kwargs[i: i + batch_size], shared_kwargs)
	for i in range(0, n_process, batch_size)]

	pool.map_unordered(_batch_func, params, progress_bar=True)