Skip to content

Instantly share code, notes, and snippets.

@louisswarren
Last active July 20, 2019 11:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save louisswarren/34bd94aefcb4f22d0fdddeee5b0a34af to your computer and use it in GitHub Desktop.
Save louisswarren/34bd94aefcb4f22d0fdddeee5b0a34af to your computer and use it in GitHub Desktop.
Min heap using a tape of booleans
# Idea suggested by Michael Cowie
import itertools
class BloomHeap:
def __init__(self, *values):
self.tape = []
if len(values) == 1 and not isinstance(values[0], int):
self += values[0]
elif values:
try:
self.tape = [False for _ in range(max(values) + 1)]
for value in values:
self.tape[value] = True
except TypeError:
raise TypeError("BloomHeap values must be natural numbers")
def __add__(self, other):
return BloomHeap(*self, *other)
def __contains__(self, x):
return self.tape[x] if isinstance(x, int) and x >= 0 else False
def __eq__(self, other):
if not isinstance(other, BloomHeap):
return False
for x, y in itertools.zip_longest(self, other):
# BloomHeap values will never be None, but the fillvalue is None
if x != y:
return False
return True
def __getitem__(self, n):
for x in self:
if n == 0:
return x
n -= 1
raise IndexError("BloomHeap index out of range")
def __iadd__(self, other):
for x in other:
self.add(x)
def __iter__(self):
for i, v in enumerate(self.tape):
if v:
yield i
def __len__(self):
i = 0
for _ in self:
i += 1
return i
def __repr__(self):
return "BloomHeap({})".format(', '.join(map(repr, self)))
def add(self, x):
try:
if x >= len(self.tape):
self.tape += [False for _ in range(x + 1 - len(self.tape))]
self.tape[x] = True
except TypeError:
raise TypeError("BloomHeap values must be natural numbers")
def copy(self):
"""Return a shallow copy of the BloomHeap.
You may want to do this if you have deleted the previous max element."""
return BloomHeap(*self)
def pop(self, n=0):
for i, v in enumerate(self.tape):
if v:
if n == 0:
self.tape[i] = False
return i
else:
n -= 1
raise IndexError("pop index out of range")
def remove(self, x):
if isinstance(x, int) and x >= 0:
self.tape[x] = False
x = BloomHeap(1, 5)
y = BloomHeap([3, 10])
print(x)
print(y)
assert(5 in x)
assert("foo" not in x)
x.add(7)
print(x)
x.remove(7)
print(x)
x = x + y
print(x)
assert(x == BloomHeap(10, 3, 1, 5))
assert(x != BloomHeap(3, 1, 5))
assert(x[1] == 3)
try:
x[4]
assert(False)
except IndexError:
pass
print(len(x))
print(bool(x))
print(x.pop(1))
print(x)
# Benchmark
from time import time
from random import sample
# BloomHeap tape is half-populated
x = sample(range(1000000), 500000)
lt = time()
ls = sorted(x)
ltt = time()
print("List time:", ltt - lt)
bt = time()
bs = iter(BloomHeap(x))
btt = time()
print("Bloom time:", btt - bt)
"""
BloomHeap(1, 5)
BloomHeap(3, 10)
BloomHeap(1, 5, 7)
BloomHeap(1, 5)
BloomHeap(1, 3, 5, 10)
4
True
3
BloomHeap(1, 5, 10)
List time: 0.15860724449157715
Bloom time: 0.27932047843933105
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment