Skip to content

Instantly share code, notes, and snippets.

@briandfoy
Last active January 27, 2019 03:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save briandfoy/b550c4ec70d2382dbcaaec1e6f53f94d to your computer and use it in GitHub Desktop.
Save briandfoy/b550c4ec70d2382dbcaaec1e6f53f94d to your computer and use it in GitHub Desktop.
Merge multiple files
#!/usr/local/bin/python3
import heapq
import os
import sys
class MergeFiles:
"""
Given a list of files, output their numbers in sorted order.
Each files has one integer per line and those numbers are
sorted in ascending order. There are no comments or blank
lines.
numbers = MergeFiles(
'odds.txt',
'evens.txt',
'repeats.txt',
)
for i in numbers:
print( "%s: %d" % (i[0], i[1]) )
"""
def __init__(self, *filenames):
"""Create an iterable from a list of filenames
Each call to the iterator returns the next smallest
number from any of the files until it exhausts all of
the files.
If one of the files does not exist, this blows up.
"""
self.heap = [];
self.count = 0;
for f in filenames:
print( f )
fh = open( f, 'r' )
n = int(fh.readline().rstrip('\n'))
# the count is a tie breaker argument. Otherwise, the
# heap will try to compare next items in the tuple,
# which are the filehandle and file name.
self.heap.append( (n, self.count, fh, f) )
self.count += 1
heapq.heapify( self.heap )
def __iter__(self):
return self
# the Python 3 version of the method (just next in 2)
def __next__(self):
# perhaps we've read the last thing in the heap.
try:
tuple = heapq.heappop( self.heap )
except IndexError:
raise StopIteration
self.count += 1
# might not be a numer, so we'll stop processing that
# file
try:
new_tuple = (
int(tuple[2].readline().rstrip('\n')),
self.count,
tuple[2],
tuple[3],
)
heapq.heappush( self.heap, new_tuple )
except:
pass
return [ tuple[-1], tuple[0] ]
numbers = MergeFiles( *sys.argv[1:] )
for i in numbers:
print( "%s: %d" % (i[0], i[1]) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment