Skip to content

Instantly share code, notes, and snippets.

@guyskk
Created October 3, 2020 04:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guyskk/ebcdfeae94b2c4ecab400719cd9a1899 to your computer and use it in GitHub Desktop.
Save guyskk/ebcdfeae94b2c4ecab400719cd9a1899 to your computer and use it in GitHub Desktop.
"""
table:
book_id, tag_id, weight, boom
insert/update:
boom = boomfilter(book_tag_ids)
select:
q = boomfilter(query_tag_ids)
select * from table
where (tag_id in query_tag_ids) and (boom & q = q)
"""
def fnv(n, p):
# http://isthe.com/chongo/tech/comp/fnv/
p = (p * 16777619) & 0xffffffff
return (n ^ p) * p
def boomfilter(numbers):
# https://llimllib.github.io/bloomfilter-tutorial/zh_CN/
m = 64
# k个随机质数, k=(m/n)ln(2), n=平均numbers个数
p_s = [389749, 834913, 271129, 691693, 964009, 963481]
boom = 0
for n in numbers:
for p in p_s:
i = fnv(n, p) % m
boom = boom | (1 << i)
return boom
def is_match(boom, numbers):
# https://stackoverflow.com/questions/360844/mysql-bitwise-operations-bloom-filter
q = boomfilter(numbers)
return (boom & q) == q
def main():
import itertools
import random
tags = [random.randint(0, 2**16 - 1) for _ in range(8)]
not_tags = [random.randint(0, 2**16 - 1) for _ in range(8)]
boom = boomfilter(tags)
print('{:b}'.format(boom))
for t in tags:
match = is_match(boom, [t])
print(f'tag {t} match={match}')
for t_s in itertools.combinations(tags, 2):
match = is_match(boom, t_s)
print(f'tag {t_s} match={match}')
for t in not_tags:
match = is_match(boom, [t])
print(f'not tag {t} match={match}')
for t_s in itertools.combinations(not_tags, 2):
match = is_match(boom, t_s)
print(f'not tag {t_s} match={match}')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment