Last active
September 9, 2019 15:56
-
-
Save mkolod/a87e44d7ed7d236b2de2c9528096c7da to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import hashlib" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class BloomFilter:\n", | |
" \n", | |
" def __init__(self, num_bits, num_hashes):\n", | |
" self.num_bits = num_bits\n", | |
" self.num_hashes = num_hashes\n", | |
" self.bit_vec = [0] * num_bits\n", | |
" \n", | |
" def generate_hash_for_item(self, item, hash_id):\n", | |
" assert type(item) == str, \"Only supporting strings at the moment\"\n", | |
" m = hashlib.sha256()\n", | |
" m.update(bytes((item + str(hash_id)).encode('utf-8'))) \n", | |
" return int(m.hexdigest(), 16) \n", | |
" \n", | |
" def add_item(self, item):\n", | |
" for hash_id in range(self.num_hashes):\n", | |
" bit_to_set = self.generate_hash_for_item(item, hash_id) % self.num_bits\n", | |
" self.bit_vec[bit_to_set] = 1\n", | |
" \n", | |
" def check_item(self, item):\n", | |
" assert type(item) == str, \"Only supporting strings at the moment\"\n", | |
" for hash_id in range(self.num_hashes):\n", | |
" bit_to_check = self.generate_hash_for_item(item, hash_id) % self.num_bits\n", | |
" if self.bit_vec[bit_to_check] == 0:\n", | |
" return False\n", | |
" return True\n", | |
" \n", | |
" \n", | |
" \n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"True\n", | |
"False\n" | |
] | |
} | |
], | |
"source": [ | |
"b = BloomFilter(128, 10)\n", | |
"b.add_item(\"hello\")\n", | |
"print(b.check_item(\"hello\"))\n", | |
"print(b.check_item(\"world\"))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment