Skip to content

Instantly share code, notes, and snippets.

@kbaesler
Created June 10, 2014 21:59
Show Gist options
  • Save kbaesler/674a99f1d8b3c19c2307 to your computer and use it in GitHub Desktop.
Save kbaesler/674a99f1d8b3c19c2307 to your computer and use it in GitHub Desktop.
Split the file into separate files based on the line count.
import os
import sys
# defines the main function for the script
def main():
# read the command line arguments for the file path.
if(len(sys.argv) >= 2):
file = sys.argv[1]
lines = int(sys.argv[2])
print 'Start: Split the %s file into files of %d lines'%(file, lines)
count = split(file, lines)
print 'Finished: Created %d files'%(count)
else:
print 'split.py file lines'
# Split the file into separate files that contain the number of lines specified.
# The last file will contain the remainder of the contents, when the number of lines is less
# than requested.
def split(filepath, lines):
path, filename = os.path.split(filepath)
basename, ext = os.path.splitext(filename)
count = 0
# open input file
f_in = open(filepath, 'r')
try:
# open the first output file
f_out = open(os.path.join(path, '%s_%s%s'%(basename, 0, ext)), 'w')
try:
# loop over all lines in the input file, and number them
for i, line in enumerate(f_in):
# every time the current line number can be divided by the
# wanted number of lines, close the output file and open a new one
if i % lines == 0:
f_out.close()
f_out = open(os.path.join(path, '%s_%s%s'%(basename, i, ext)), 'w')
count += 1
# write the line to the output file
f_out.write(line)
finally:
# close the last output file
f_out.close()
finally:
# close the input file
f_in.close()
return count
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment