Skip to content

Instantly share code, notes, and snippets.

@tkrs
Last active October 18, 2018 06:48
Show Gist options
  • Save tkrs/97fc0b3cc0d050a283b39dd006c528e5 to your computer and use it in GitHub Desktop.
Save tkrs/97fc0b3cc0d050a283b39dd006c528e5 to your computer and use it in GitHub Desktop.
Splits the file by the passed lines
import sys
import gzip
from itertools import islice
class FileSpliter:
def __init__(self, from_file, to_file_prefix):
self.from_file = from_file
self.to_file_prefix = to_file_prefix
def _open_from(self):
if self.from_file.endswith('.gz'):
return gzip.open(self.from_file, 'rb')
else:
return open(self.from_file, 'rb')
def _open_to(self, suffix):
f = '%s.%i' % (self.to_file_prefix, suffix)
if self.from_file.endswith('.gz'):
return gzip.open(f + '.gz', 'wb')
else:
return open(f, 'wb')
def _read(self, num_lines):
with self._open_from() as fpr:
while True:
lines = islice(fpr, num_lines)
try:
head = next(lines)
def f():
yield head
for v in lines:
yield v
yield f()
except StopIteration:
return
def _write(self, num_files, lines):
with self._open_to(num_files) as fpw:
fpw.writelines(lines)
def split(self, num_lines):
for (i, l) in enumerate(self._read(num_lines)):
self._write(i, l)
def main():
argv = sys.argv[1:3]
fs = FileSpliter(*argv)
fs.split(int(sys.argv[3]))
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment