Skip to content

Instantly share code, notes, and snippets.

@bharadwaj6
Created June 3, 2017 06:56
Show Gist options
  • Save bharadwaj6/0a812a55b604105c72857cdddada9e06 to your computer and use it in GitHub Desktop.
Save bharadwaj6/0a812a55b604105c72857cdddada9e06 to your computer and use it in GitHub Desktop.
To calculate running median given a huge list of numbers
from heapq import heappop, heappush
class MinHeap(object):
def __init__(self):
self._items = []
def push(self, item):
heappush(self._items, item)
def peek(self):
return self._items[0]
def pop(self):
return heappop(self._items)
def length(self):
return len(self._items)
def __repr__(self):
return str(self._items)
class MaxHeap(object):
def __init__(self):
self._items = []
def push(self, item):
heappush(self._items, -item)
def peek(self):
return -self._items[0]
def pop(self):
return -heappop(self._items)
def length(self):
return len(self._items)
def __repr__(self):
return str(self._items)
from heaps import MinHeap, MaxHeap
with open('median_stream.txt', 'r') as f:
numbers = [int(line) for line in f.readlines()]
def balance_heaps(left_heap, right_heap):
if left_heap.length() > right_heap.length():
right_heap.push(left_heap.pop())
elif right_heap.length() > left_heap.length():
left_heap.push(right_heap.pop())
return (left_heap, right_heap)
def number_stream():
for n in numbers:
yield n
left_heap = MaxHeap()
right_heap = MinHeap()
ctr = 0
current = None
medians = []
for num in number_stream():
ctr += 1
if ctr == 1:
left_heap.push(num)
medians.append(num)
current = num
continue
if abs(left_heap.length() - right_heap.length()) == 2:
left_heap, right_heap = balance_heaps(left_heap, right_heap)
# change the current to new median
current = left_heap.peek()
if num <= current:
left_heap.push(num)
if num > current:
right_heap.push(num)
if abs(left_heap.length() - right_heap.length()) == 2:
left_heap, right_heap = balance_heaps(left_heap, right_heap)
if ctr % 2 == 0:
current = left_heap.peek()
else:
if right_heap.length() > left_heap.length():
current = right_heap.peek()
else:
current = left_heap.peek()
medians.append(current)
print sum(medians) % 10000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment