Skip to content

Instantly share code, notes, and snippets.

@dcollien
Created August 18, 2014 09:17
Show Gist options
  • Save dcollien/0c3827d1ac21995469f0 to your computer and use it in GitHub Desktop.
Save dcollien/0c3827d1ac21995469f0 to your computer and use it in GitHub Desktop.
Load text files and reverse their lines, running 4 processes at a time
import os
from multiprocessing import Pool
NUM_PROCESSES = 4
ALLOWED_EXTENSION = '.txt'
NEW_EXTENSION = '.reversed.txt'
OUTPUT_DIR = os.path.join(os.getcwd(), 'output')
def processFile(file):
# process a file, delegated over multiple processes
fileName = os.path.basename(file) + NEW_EXTENSION
outputPath = os.path.join(OUTPUT_DIR, fileName)
with open(file, 'r') as inputFile:
with open(outputPath, 'w') as outputFile:
for line in inputFile:
# Reverse each line
outputFile.write(line[::-1])
print file + ' Done!'
return outputPath
def isValidFile(filePath):
return os.path.isfile(filePath) and filePath.endswith(ALLOWED_EXTENSION)
def loadFileNames(dir):
return [file for file in os.listdir(dir) if isValidFile(os.path.join(dir, file))]
def startWorking():
# Get a list of valid filenames in the current directory
fileNames = loadFileNames(os.getcwd())
print 'About to process ' + str(len(fileNames)) + ' files.'
# Make sure the output directory exists
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
# Set up a processing pool
pool = Pool(processes=NUM_PROCESSES)
# Send the input data to the processing pool
result = pool.map(processFile, fileNames)
print result
if __name__ == '__main__':
startWorking()
@CGCooke
Copy link

CGCooke commented Aug 18, 2014

Thanks Dave!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment