Skip to content

Instantly share code, notes, and snippets.

@neerajvashistha
Created March 4, 2021 01:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save neerajvashistha/2f2966ca08c717340ab142eccc9685d0 to your computer and use it in GitHub Desktop.
Save neerajvashistha/2f2966ca08c717340ab142eccc9685d0 to your computer and use it in GitHub Desktop.
Simple multi processing based on list
import glob
from multiprocessing import Pool, freeze_support
def multicore_function(function_name, alist, wfname):
'''
this function accept 3 arguments
1.function name: name of that function which will be called.
2.wfname: name of file which will be written.
3.alist: list of data which we breaks in four parts and assigned to different core.
'''
freeze_support()
def chunkify(wfname, lst, n):
'''
wfname is the output file name in which the content of 'n' lst(list) of files will be written
'''
rt = []
rt_lst = [lst[i::n] for i in range(n)]
for i in range(n):
if rt_lst[i]:           
rt.append([wfname+'_'+str(i)+'.txt']+[rt_lst[i]])
return rt
#create a process Pool with 4 processes
parts = (part for part in chunkify(wfname, alist, 1000))
#map doWork to availble Pool processes
with Pool(processes=4) as pool:
pool.map(function_name, parts)
def process_pdf_list(alisted_file):
    '''
    this function will take n files in one go from list, alisted_file.
    alisted_file = ['out_filename_0.txt',['in_file1', ... ,'in_file100']]
    '''
   
    # Initialise the output file
    out_file = alisted_file[0]
    listed = alisted_file[1]
    with open(out_file, 'w', encoding="utf-8") as fw:
        for file_name in listed:
            try:
                text = converter.pdf_reader(file_name)
                line = line_creator_new(text)
                fw.write(bytes(line, 'utf-8').decode('utf-8') + '\n')
               
            except:
                print("error in reading" + file_name)
    logging.info("N files processed and written to" + out_file + " ")
def main():
    list_files = glob.glob('*.pdf')
    multicore_function(process_pdf_list, list_files, "out_filename")
if __name__ == '__main__':
    main()
@neerajvashistha
Copy link
Author

import multiprocessing
def createpdf(data):
    return ("This is my pdf data: %s\n" % data, 0)


data = [ "My data", "includes", "strings and", "numbers like", 42, "and", 3.14]
number_of_processes = 5
results = multiprocessing.Pool(number_of_processes).map(createpdf, data)
outputs = [result[0] for result in results]
pdfoutput = "".join(outputs)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment