Created
August 26, 2020 00:49
-
-
Save anilpai/48d058d6a1e982d57ab55700c2e440c0 to your computer and use it in GitHub Desktop.
Running median of an array of numbers (streaming data) using 2 heaps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from heapq import heappush,heappop, heapify,_heapify_max | |
arr = [11, 8, 9, 6, 20, 5, 7, 14, 12, 3] | |
# `low` is a max-heap & `high` is a min-heap | |
low = [] | |
high = [] | |
# Init median to zero | |
median = 0 | |
for i in arr: | |
# array value is less than median, add to median | |
# add the value to max-heap & heapify | |
# else add to min-heap & heapify | |
if i < median: | |
# as we don't have inbuilt function called _heappush_max | |
# so use heappush [O(log N) per push] and call _heapify_max [O(N)] | |
heappush(low, i) | |
_heapify_max(low) | |
else: | |
heappush(high, i) | |
# The size of height difference between heaps | |
# is more than 1, then move the element & heapify both | |
if len(low) > len(high)+1: | |
heappush(high, heappop(low)) | |
_heapify_max(low) | |
elif len(high) > len(low) + 1: | |
heappush(low, heappop(high)) | |
_heapify_max(low) | |
# Calculate Median: | |
# If lengths equal then median is average of both heaps | |
# Else median is larger heap element | |
if len(low) == len(high): | |
median = float(low[0] + high[0])/2.0 | |
else: | |
median = float(low[0]) if len(low) > len(high) else float(high[0]) | |
print("Median: " , median) | |
# Time Complexity = O(log N) + O(1) ≈ O(log N). | |
# Space complexity = O(N) linear space to hold input in heap containers. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment