Skip to content

Instantly share code, notes, and snippets.

@lucmann
Created September 11, 2020 09:43
Show Gist options
  • Save lucmann/2ba0570586c96e3864a137c3336b23b4 to your computer and use it in GitHub Desktop.
Save lucmann/2ba0570586c96e3864a137c3336b23b4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import argparse
class LargeFileSplitter:
def __init__(self, file, n):
self.file = file
self.num = n
if file.endswith('.txt'):
file = file[:-4]
self.sub_files = [ "%s%02d.txt" % (file, i) for i in range(n) ]
def split(self):
with open(self.file, 'r') as f:
lines = [ line for line in f ]
lines_per_sub_file = len(lines) // self.num
for i in range(self.num - 1):
with open(self.sub_files[i], 'w') as sf:
sf.write(''.join(lines[i * lines_per_sub_file : (i + 1) * lines_per_sub_file]))
with open(self.sub_files[self.num - 1], 'w') as last_sf:
last_sf.write(''.join(lines[lines_per_sub_file * (self.num - 1) :]))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('file', metavar='FILE', type=str, help='specify the large file to be split')
parser.add_argument('num', metavar='N', type=int, help='specify how many sub files to split into')
args = parser.parse_args()
LargeFileSplitter(args.file, args.num).split()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment