Created
December 13, 2017 16:41
-
-
Save dash-docs/5d63f095352a717f90d41a5e7fbbdac1 to your computer and use it in GitHub Desktop.
Dash Bloom Filter Tutorial - example script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from math import log | |
from bitarray import bitarray # from pypi.python.org/pypi/bitarray | |
import pyhash # from https://github.com/flier/pyfasthash | |
# Based on BIP-37 | |
# https://github.com/QuantumExplorer/bips/blob/master/bip-0037.mediawiki | |
# Defined in bloom.h | |
# https://github.com/dashpay/dash/blob/master/src/bloom.h#L17-#L18 | |
MAX_BLOOM_FILTER_SIZE = 36000 | |
MAX_HASH_FUNCS = 50 | |
# Set of flags that control how matched items are added to the filter (per BIP-37) | |
# https://github.com/dashpay/dash/blob/master/src/bloom.h#L24 | |
nFlags = 0 | |
nElements = 1 # Number of elements | |
nFPRate = 0.0001 # False positive rate | |
nFilterBytes = int(min((-1 / log(2)**2 * nElements * log(nFPRate)) / 8, MAX_BLOOM_FILTER_SIZE)) | |
# Calculate the number of hash functions to use in the filter | |
# Limit the maximum number to 50 per BIP-37 | |
nHashFuncs = int(min(nFilterBytes * 8 / nElements * log(2), MAX_HASH_FUNCS)) | |
murmur3 = pyhash.murmur3_32() | |
TEST_TXID = "019f5b01d4195ecbc9398fbf3c3b1fa9bb3183301d7a1fb3bd174fcfa40a2b65" | |
def bloom_hash(nHashNum, data): | |
seed = (nHashNum * 0xfba4c795 + nTweak) & 0xffffffff | |
return( murmur3(data, seed=seed) % (nFilterBytes * 8) ) | |
# Bloom Filter creation | |
def create_filter(nTweak): | |
print('Creating bloom filter') | |
vData = nFilterBytes * 8 * bitarray('0', endian="little") | |
data_to_hash = TEST_TXID | |
data_to_hash = data_to_hash.decode("hex") | |
print('Filter bytes: {}; Hash functions: {}'.format(nFilterBytes, nHashFuncs)) | |
print(" Filter (As Bits)") | |
print("nHashNum nIndex Filter 0123456789abcdef") | |
print("~~~~~~~~ ~~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~") | |
for nHashNum in range(nHashFuncs): | |
nIndex = bloom_hash(nHashNum, data_to_hash) | |
## Set the bit at nIndex to 1 | |
vData[nIndex] = True | |
## Debug: print current state | |
print(' {0:2} {1:2} {2} {3}'.format( | |
nHashNum, | |
hex(int(nIndex)), | |
vData.tobytes().encode("hex"), | |
vData.to01() | |
)) | |
print('Bloom filter: {}\n'.format(vData.tobytes().encode("hex"))) | |
# Bloom Filter evaluation | |
def evaluate_filter(): | |
print('Evaluating bloom filter') | |
vData = bitarray(endian='little') | |
vData.frombytes("b50f".decode("hex")) | |
nHashFuncs = 11 | |
nTweak = 0 | |
nFlags = 0 | |
def contains(nHashFuncs, data_to_hash): | |
for nHashNum in range(nHashFuncs): | |
## bloom_hash as defined in previous section | |
nIndex = bloom_hash(nHashNum, data_to_hash) | |
if vData[nIndex] != True: | |
print("MATCH FAILURE: Index {0} not set in {1}\n".format( | |
hex(int(nIndex)), | |
vData.to01() | |
)) | |
return False | |
print("MATCH SUCCESS\n") | |
## Test 1: Same TXID as previously added to filter | |
data_to_hash = TEST_TXID | |
print('\nChecking: {}'.format(data_to_hash)) | |
data_to_hash = data_to_hash.decode("hex") | |
contains(nHashFuncs, data_to_hash) | |
## Test 2: Arbitrary string | |
data_to_hash = "1/10,000 chance this ASCII string will match" | |
print('Checking: {}'.format(data_to_hash)) | |
contains(nHashFuncs, data_to_hash) | |
# Tweak is a random value added to the seed value in the hash function | |
# used by the bloom filter | |
nTweak = 0 | |
create_filter(nTweak) | |
evaluate_filter() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment