Created
March 5, 2013 20:13
-
-
Save shriphani/5093858 to your computer and use it in GitHub Desktop.
Create condor_run strings for running jobs in parallel over a list of files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Given a large list of files and the number of processes, | |
this script will divide up the work, generate a list of condor_submit files, | |
submit them to condor and exit | |
""" | |
import argparse | |
import errno | |
import math | |
import os | |
class OutputDirectoryExistsError(Exception): | |
pass | |
def chunk(l, n): | |
return [l[i:i+int(math.ceil(float(len(l))/n))] for i in range(0, len(l), int(math.ceil(float(len(l))/n)))] | |
def build_condor_run_string(job_name, output_directory, command_str, my_job_id, file_list): | |
job_files_list_name = job_name + '_' + str(my_job_id) + '.txt' | |
with open(os.path.join(output_directory, job_files_list_name), 'w') as file_list_handle: | |
for file_name in file_list: | |
file_list_handle.write(file_name + '\n') | |
print 'condor_run ' + '\"' + command_str.replace('{}', os.path.join(output_directory, job_files_list_name)) + '\" &' | |
if __name__ == '__main__': | |
def parse_cmdline_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'job_name', | |
metavar = 'job-name', | |
help = 'We use this as the name of the process' | |
) | |
parser.add_argument( | |
'list_of_files', | |
metavar = 'list-of-files', | |
help = 'List of files to use' | |
) | |
parser.add_argument( | |
'num_procs', | |
metavar = 'num-procs', | |
help = 'Number of processes we are using', | |
type = int | |
) | |
parser.add_argument( | |
'command_str', | |
metavar = 'command-str', | |
help = 'A shell command pls.' | |
) | |
parser.add_argument( | |
'--output-directory', | |
dest = 'output_directory', | |
help = 'Where to dump the files', | |
default = '.' | |
) | |
return parser.parse_args() | |
parsed = parse_cmdline_args() | |
def safe_mkdir(path): | |
try: | |
os.makedirs(path) | |
except OSError as exception: | |
if exception.errno != errno.EEXIST: | |
raise | |
else: | |
raise OutputDirectoryExistsError('Output directory must not already exist') | |
if parsed.output_directory != '.': safe_mkdir(parsed.output_directory) | |
with open(parsed.list_of_files, 'r') as file_list_handle: | |
chunked_file_list = chunk(map(lambda s : s.strip(), file_list_handle.readlines()), parsed.num_procs) | |
for i in range(len(chunked_file_list)): | |
build_condor_run_string(parsed.job_name, parsed.output_directory, parsed.command_str, i, chunked_file_list[i]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment