wey-gu/Nebula-Graph-Murmur64A-Hash.md

## Nebula-Graph-Murmur64A-Hash.md

      
    Raw
  

              Nebula-Graph-Murmur64A-Hash.md
            
          
    Reference:

https://stackoverflow.com/questions/13305290/is-there-a-pure-python-implementation-of-murmurhash
https://github.com/vesoft-inc/nebula/blob/master/src/common/base/MurmurHash2.h
https://github.com/jievince/rdf-converter/blob/master/murmur2.go

def bytes_to_long(bytes):
    assert len(bytes) == 8
    return sum((b << (k * 8) for k, b in enumerate(bytes)))


def murmur64(data, seed = 0xc70f6907):

    import ctypes

    m = ctypes.c_uint64(0xc6a4a7935bd1e995).value

    r = ctypes.c_uint32(47).value

    MASK = ctypes.c_uint64(2 ** 64 - 1).value

    data_as_bytes = bytearray(data)

    seed = ctypes.c_uint64(seed).value

    h = seed ^ ((m * len(data_as_bytes)) & MASK)

    off = int(len(data_as_bytes)/8)*8
    for ll in range(0, off, 8):
        k = bytes_to_long(data_as_bytes[ll:ll + 8])
        k = (k * m) & MASK
        k = k ^ ((k >> r) & MASK)
        k = (k * m) & MASK
        h = (h ^ k)
        h = (h * m) & MASK

    l = len(data_as_bytes) & 7

    if l >= 7:
        h = (h ^ (data_as_bytes[off+6] << 48))

    if l >= 6:
        h = (h ^ (data_as_bytes[off+5] << 40))

    if l >= 5:
        h = (h ^ (data_as_bytes[off+4] << 32))

    if l >= 4:
        h = (h ^ (data_as_bytes[off+3] << 24))

    if l >= 3:
        h = (h ^ (data_as_bytes[off+2] << 16))

    if l >= 2:
        h = (h ^ (data_as_bytes[off+1] << 8))

    if l >= 1:
        h = (h ^ data_as_bytes[off])
        h = (h * m) & MASK

    h = h ^ ((h >> r) & MASK)
    h = (h * m) & MASK
    h = h ^ ((h >> r) & MASK)

    return ctypes.c_long(h).value

print(str(murmur64(bytes("to_be_hashed", encoding = "utf8"), seed=0xc70f6907)))
Verify it towards nebula graph

❯ python test.py
-1098333533029391540

❯ nebula-console-3.0 -addr 10.1.1.168 -port 39669 -user root -p nebula -e 'YIELD hash("to_be_hashed");'
(root@nebula) [(none)]> YIELD hash("to_be_hashed");
+----------------------+
| hash("to_be_hashed") |
+----------------------+
| -1098333533029391540 |
+----------------------+
Got 1 rows (time spent 751/5528 us)

Tue, 17 May 2022 15:18:45 CST