QuantVI/quick_split_batch.py

## quick_split_batch.py
# Editing to be able to split all files in a directory

import sys
import os # for getting the list of all files in the directory, when we use batch mode
import re # to find file matching a particular handle/pattern

a = sys.argv[0]
print '\n', sys.argv


# has to be defined before we use it below
def fileProcessor(filename):

    working_file = open(filename,'rb')
    iter = 0
    prefix_stub = 'out_'
    suffix_ver = 0
    name_strand = filename[:-4:] + '_' # removes the .txt extension from the filename string

    for one_line in working_file:
        #print repr(one_line)
        if iter != int(part_size):
            fname = prefix_stub + name_strand + str(suffix_ver)
            current_output = open(fname,"ab")
            # each line in working_file, except maybe the last line, contains \n
            # for the last line in each part_file, we don't want the \n
            if iter == int(part_size)-1: #e.g. iter = 19 ans part_size = 20
                iter = iter +1
                # split() with no args, splits into a list on space and removes \n
                # we join back to a string using a space
                current_output.write(' '.join(one_line.split()))
            else:
                iter = iter +1
                current_output.write(one_line)
            current_output.close()
        else:
            #iter doesn't return to 0, because above we add 1 to it immediately
            iter = 1
            suffix_ver = suffix_ver + 1
            fname = prefix_stub + name_strand + str(suffix_ver)
            current_output = open(fname,"ab")
            current_output.write(one_line)
            current_output.close()

    working_file.close()


''' Mian are starts here ******************************* '''

if len(sys.argv) != 4:
    print """Wrong number of arguments\n
    Note:   This program expects a TEXT file with .txt extension\n
    Usage:  python quicksplit.py <filename> <option> <length>\n
        filename,   the name of a file in this directory. e.g. readme.txt
            batch : use this as the filename to split ALL files in the directory
        option,
            -l  : indicates splitting the file based on length counted in lines
            ** currently no other options **
        length,     maximum length in lines for each file chunk

    Example:    python quicksplit.py readme.txt -l 1000\n
                python quicksplit.py batch -l 500\n"""

else:
    # note part_size will be a string
    filename = sys.argv[1]
    option_letter = sys.argv[2]
    part_size = sys.argv[3]

    ### iter = 0

    ## prefix_stub = 'out_'
    ## suffix_ver = 0

    if filename == 'batch':
        fileLocation = os.environ['PWD'] + '/'
        directoryFiles = os.listdir(fileLocation)
        lstore = []
        for phile in directoryFiles:
            lstore.append( ([m.group() for m in re.finditer('(.*)+\.txt',phile)]) )

        customFiles = []
        # this is to flatten the result list
        for listOfString in lstore:
            if listOfString:
                customFiles.append(listOfString[0])

        # print '... Temp list of files to consider \t',len(lstore), '\n', lstore
        # print customFiles
        print '... Temp list of files to consider \n'
        for f in customFiles:
            if f:
                print f
        print '\n'

        # now we run things consecutively
        for f in customFiles:
            fileProcessor(f)
    else:
        fileProcessor(filename)
	# Editing to be able to split all files in a directory

	import sys
	import os # for getting the list of all files in the directory, when we use batch mode
	import re # to find file matching a particular handle/pattern

	a = sys.argv[0]
	print '\n', sys.argv




	# has to be defined before we use it below
	def fileProcessor(filename):

	working_file = open(filename,'rb')
	iter = 0
	prefix_stub = 'out_'
	suffix_ver = 0
	name_strand = filename[:-4:] + '_' # removes the .txt extension from the filename string

	for one_line in working_file:
	#print repr(one_line)
	if iter != int(part_size):
	fname = prefix_stub + name_strand + str(suffix_ver)
	current_output = open(fname,"ab")
	# each line in working_file, except maybe the last line, contains \n
	# for the last line in each part_file, we don't want the \n
	if iter == int(part_size)-1: #e.g. iter = 19 ans part_size = 20
	iter = iter +1
	# split() with no args, splits into a list on space and removes \n
	# we join back to a string using a space
	current_output.write(' '.join(one_line.split()))
	else:
	iter = iter +1
	current_output.write(one_line)
	current_output.close()
	else:
	#iter doesn't return to 0, because above we add 1 to it immediately
	iter = 1
	suffix_ver = suffix_ver + 1
	fname = prefix_stub + name_strand + str(suffix_ver)
	current_output = open(fname,"ab")
	current_output.write(one_line)
	current_output.close()

	working_file.close()


	''' Mian are starts here ******************************* '''

	if len(sys.argv) != 4:
	print """Wrong number of arguments\n
	Note: This program expects a TEXT file with .txt extension\n
	Usage: python quicksplit.py <filename> <option> <length>\n
	filename, the name of a file in this directory. e.g. readme.txt
	batch : use this as the filename to split ALL files in the directory
	option,
	-l : indicates splitting the file based on length counted in lines
	currently no other options
	length, maximum length in lines for each file chunk

	Example: python quicksplit.py readme.txt -l 1000\n
	python quicksplit.py batch -l 500\n"""

	else:
	# note part_size will be a string
	filename = sys.argv[1]
	option_letter = sys.argv[2]
	part_size = sys.argv[3]

	### iter = 0

	## prefix_stub = 'out_'
	## suffix_ver = 0

	if filename == 'batch':
	fileLocation = os.environ['PWD'] + '/'
	directoryFiles = os.listdir(fileLocation)
	lstore = []
	for phile in directoryFiles:
	lstore.append( ([m.group() for m in re.finditer('(.*)+\.txt',phile)]) )

	customFiles = []
	# this is to flatten the result list
	for listOfString in lstore:
	if listOfString:
	customFiles.append(listOfString[0])

	# print '... Temp list of files to consider \t',len(lstore), '\n', lstore
	# print customFiles
	print '... Temp list of files to consider \n'
	for f in customFiles:
	if f:
	print f
	print '\n'

	# now we run things consecutively
	for f in customFiles:
	fileProcessor(f)
	else:
	fileProcessor(filename)