djinn/document_processing.py

## document_processing.py
#!/usr/bin/env python3
# Author: Supreet Sethi <supreet.sethi@gmail.com>
# Dated: 16/10/2020
# Please use it has working prototype
# Lot can be done from process management and general housekeeping perspective


from multiprocessing import Pool, cpu_count, Manager
from collections import namedtuple
from random import choices, choice
from time import sleep


class Task(object):
	def __init__(self, source, destination):
		self.source = source
		self.destination = destination


def work(task):
	print("Processing src=%s, dest=%s" % (task.source, task.destination))
	sleep(1)
	return


# No reason to setup a pool of processes larger than available CPUs
def num_cpu():
	return cpu_count()


def create_tasks(num=10000):
	bets = 'abcdefghijklmnopqrstuvwxyz'
	nums = '0123456789'
	def tsk(n):
		src = choice(bets) + ''.join(choices(nums, k=2))
		dest = choice(bets) + ''.join(choices(nums, k=2))
		t = Task(src, dest)
		return t
	for i in range(num):
		yield(tsk(i))


if __name__ == '__main__':
	cpus = num_cpu()
	p = Pool(processes=num_cpu())
	p.map(work, create_tasks())
	#!/usr/bin/env python3
	# Author: Supreet Sethi <supreet.sethi@gmail.com>
	# Dated: 16/10/2020
	# Please use it has working prototype
	# Lot can be done from process management and general housekeeping perspective



	from multiprocessing import Pool, cpu_count, Manager
	from collections import namedtuple
	from random import choices, choice
	from time import sleep


	class Task(object):
	def __init__(self, source, destination):
	self.source = source
	self.destination = destination



	def work(task):
	print("Processing src=%s, dest=%s" % (task.source, task.destination))
	sleep(1)
	return



	# No reason to setup a pool of processes larger than available CPUs
	def num_cpu():
	return cpu_count()


	def create_tasks(num=10000):
	bets = 'abcdefghijklmnopqrstuvwxyz'
	nums = '0123456789'
	def tsk(n):
	src = choice(bets) + ''.join(choices(nums, k=2))
	dest = choice(bets) + ''.join(choices(nums, k=2))
	t = Task(src, dest)
	return t
	for i in range(num):
	yield(tsk(i))




	if __name__ == '__main__':
	cpus = num_cpu()
	p = Pool(processes=num_cpu())
	p.map(work, create_tasks())