Skip to content

Instantly share code, notes, and snippets.

@brunokim
Created June 9, 2022 13:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brunokim/56da945ed2869e9b07e668d79f7fb805 to your computer and use it in GitHub Desktop.
Save brunokim/56da945ed2869e9b07e668d79f7fb805 to your computer and use it in GitHub Desktop.
def merge_files(filenames):
# Abre um arquivo temporário de forma segura
fd, tempname = tempfile.mkstemp(text=True)
try:
target = os.fdopen(fd, mode='w')
# Abre todos os arquivos temporários.
# FileState é um wrapper que guarda qual foi a última chave/valor lida
files = [FileState(filename) for filename in filenames]
[f.open() for f in files]
num_records = 0
while files:
# Lê todos os arquivos abertos. Se algum chegar ao EOF, ele
# é fechado em FileState.
[f.read() for f in files]
files = [f for f in files if not f.is_closed]
if not files:
break
# Pega a menor chave dentre todos os arquivos.
min_key = min(f.key for f in files)
total = 0
for f in files:
# Múltiplos arquivos podem ter a mesma chave. Soma os seus
# valores.
if f.key == min_key:
f.key = None
total += f.count
# Serializa no arquivo temporário
target.write(f'{min_key}\t{total}\n')
num_records += 1
finally:
os.close(fd)
return tempname, num_records
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment