Skip to content

Instantly share code, notes, and snippets.

@danielgarciabsb
Created October 5, 2020 22:02
Show Gist options
  • Save danielgarciabsb/c4ad19f51573aaf55c641dca6a39ef33 to your computer and use it in GitHub Desktop.
Save danielgarciabsb/c4ad19f51573aaf55c641dca6a39ef33 to your computer and use it in GitHub Desktop.
import os, math
import fileinput
class FileSplit(object):
MAX_BUFFER_SIZE = 102400
def __init__(self, fileName, splitAmount):
self.fileName = fileName
self.splitAmount = splitAmount
self.fileSize = None
self.splitSize = None
self.bufferSize = None
self.splitReadAmount = None
def __fileSize(self):
if self.fileSize is None:
file = open(self.fileName, "rb", 0)
file.seek(0, os.SEEK_END)
self.fileSize = file.tell()
file.close()
return self.fileSize
def __splitSize(self):
if self.splitSize is None:
self.splitSize = int(math.ceil(self.__fileSize() / self.splitAmount))
return self.splitSize
def __bufferSize(self):
if self.bufferSize is None:
self.bufferSize = self.__splitSize() if FileSplit.MAX_BUFFER_SIZE > self.__splitSize() else FileSplit.MAX_BUFFER_SIZE
return self.bufferSize
def __splitReadAmount(self):
if self.splitReadAmount is None:
self.splitReadAmount = int(math.floor(self.__splitSize() / self.__bufferSize()))
return self.splitReadAmount
def process(self):
splitAmount = 1
input = open(self.fileName, "rb", 0)
while splitAmount <= self.splitAmount:
output = open(self.fileName + '.' + str(splitAmount) + '.CSV', "wb")
output.write(b'NSUDF;STATUS\r\n')
splitReadAmount = 1
while splitReadAmount < self.__splitReadAmount():
rb = input.read(self.__bufferSize())
output.write(rb)
splitReadAmount += 1
rb = None
if splitAmount != self.splitAmount:
rb = input.read(self.__bufferSize())
lastLinePos = rb.rfind(b'\n') + 1
input.seek(-self.__bufferSize(), 1)
rb = input.read(lastLinePos)
elif splitReadAmount == self.__splitReadAmount():
rb = input.read()
output.write(rb)
splitAmount += 1
output.close()
input.close()
def desc(self):
print('Tamanho total', self.__fileSize(), 'bytes')
print('Quantidade divisoes', self.splitAmount)
print('Tamanho divisao', self.__splitSize(), 'bytes')
print('Tamanho buffer', self.__bufferSize(), 'bytes')
print('Quantidade leitura por bloco (buffer)', self.__splitReadAmount())
with fileinput.FileInput("NSUDF.TXT", inplace=True, backup='.bak') as file:
for line in file:
print(line.replace('\n', ';1\n'), end='')
split = FileSplit("NSUDF.TXT", 20)
split.desc()
split.process()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment