Skip to content

Instantly share code, notes, and snippets.

@sagarkar10
Created July 14, 2019 18:10
Show Gist options
  • Save sagarkar10/5d877ec8eec758842bca2f2d7a2fb5b4 to your computer and use it in GitHub Desktop.
Save sagarkar10/5d877ec8eec758842bca2f2d7a2fb5b4 to your computer and use it in GitHub Desktop.
Comparison between Python MultiProcessing and MultiThreading
import os
import math
import time
import sys
from multiprocessing import Process
from threading import Thread
from multiprocessing.pool import ThreadPool as TPool
from multiprocessing import Pool as MPool
from loguru import logger
def costly_cpu_op(index):
t = time.time()
for i in range(1,100000000):
math.sqrt(i)
logger.info(f"{costly_cpu_op.__name__} at index:{index} took:{str(time.time()-t)} sec")
def costly_io_op(index):
t = time.time()
with open("train-balanced-sarcasm.csv", "r") as fr:
fr.read()
logger.info(f"{costly_io_op.__name__} at index:{index} took:{str(time.time()-t)} sec")
def run_simple_processing(func, count):
ts = time.time()
for index in range(count):
func(index)
logger.info(f"Time for simple-processing of {count} ops of {func.__name__} took :{time.time()-ts} sec")
def run_multiprocessing(func, count):
indexes = range(count)
process_list = []
ts = time.time()
for i in indexes:
p = Process(target=func, args=(i,))
process_list.append(p)
for process in process_list:
process.start()
for process in process_list:
process.join()
logger.info(f"Time for multi-processing of {count} ops of {func.__name__} took :{time.time()-ts} sec")
def run_multithreading(func, count):
indexes = range(count)
thread_list = []
ts = time.time()
for i in indexes:
th = Thread(target=func, args=(i,))
thread_list.append(th)
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()
logger.info(f"Time for multi-threading of {count} ops of {func.__name__} took :{time.time()-ts} sec")
def run_pool_multiprocessing(func, count):
p = MPool(os.cpu_count())
ts = time.time()
p.map_async(func, range(count))
p.close()
p.join()
logger.info(f"Time for pool-multi-processing of {count} ops of {func.__name__} took :{time.time()-ts} sec")
def run_pool_multithreading(func, count):
p = TPool(os.cpu_count())
ts = time.time()
p.map_async(func, range(count))
p.close()
p.join()
logger.info(f"Time for pool-multi-threading of {count} ops of {func.__name__} took :{time.time()-ts} sec")
if __name__=="__main__":
run_mapper = {
"simple":run_simple_processing,
"mt":run_multithreading,
"pmt":run_pool_multithreading,
"mp":run_multiprocessing,
"pmp":run_pool_multiprocessing
}
func_mapper = {
"cpu":costly_cpu_op,
"io":costly_io_op
}
# run_type: [simple, mp, mt, pmp, pmt]
run_type = run_mapper[sys.argv[1]]
# func_type: [cpu, io]
func_type = func_mapper[sys.argv[2]]
# count: int (use os.cpu_count())
count = os.cpu_count()
logger.add(f"logs/log_{sys.argv[1]}_{sys.argv[2]}_count_{count}", serialize=True, enqueue=True)
logger.info(f"Running {run_type.__name__} for {func_type.__name__} with count :{count}")
run_type(func_type, os.cpu_count())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment