Skip to content

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
import heapq
def saveTopK(k, log_fn, output_fn):
min_heap = []
with open(log_fn, 'r') as log:
for line in log:
entry = line.strip()
values = entry.split() # split by spaces
# start and end time are the last two values in an entry
# convert a string like '23.04.05' to [23, 4, 5]
start = [int(x) for x in values[-2].split('.')]
end = [int(x) for x in values[-1].split('.')]
# convert to seconds
start_seconds = start[0] * 60 * 60 + start[1] * 60 + start[2]
end_seconds = end[0] * 60 * 60 + end[1] * 60 + end[2]
time = end_seconds - start_seconds
# store the total time and the original entry as a tuple
# comparison within the heap is done based on the first position:
# http://stackoverflow.com/questions/5292303/python-tuple-comparison
item = (time, entry)
if len(min_heap) < k:
heapq.heappush(min_heap, item)
else:
if time > min_heap[0][0]:
heapq.heapreplace(min_heap, item)
with open(output_fn, 'w') as output:
# get heap elements as a sorted list and output to disk
for item in heapq.nlargest(k, min_heap):
output.write(item[1] + '\n')
# or in python3: print(item[1], file=output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.