Skip to content

Instantly share code, notes, and snippets.

@kelvingakuo
Created November 11, 2021 11:48
Show Gist options
  • Save kelvingakuo/778bfda412eef9e8d09be67c6bb58078 to your computer and use it in GitHub Desktop.
Save kelvingakuo/778bfda412eef9e8d09be67c6bb58078 to your computer and use it in GitHub Desktop.
def hash_func(ind: int, mp_size: int):
""" A simple hash function (uses a version of the mid-square method and modulus)
Params:
ind (int) - The index to hash
mp_size (int) - The size of the hash map
Returns:
key (int) - The hashed output
"""
ind_sq = ind ** 2
ls_t = int(str(ind_sq)[-2:])
key = ls_t % mp_size
return key
def inner_hash_join(left: List[dict], right: List[dict], on: str) -> List[dict]:
""" Hash JOIN: Compute hash table using smaller table. Hash outer table and probe hash map
INNER JOIN only implemented
"""
output = []
hash_map = dict()
inner = left if len(left) < len(right) else right
outer = left if len(left) > len(right) else right
# Build
for record in inner:
rec_key = hash_func(record[on], 90)
hash_map[rec_key] = record
# Probe
for record in outer:
rec_key = hash_func(record[on], 90)
mtch = hash_map.get(rec_key, None)
if(mtch is None):
pass
# No match
else:
# Check the records themselves incase collisions
if(record[on] == mtch[on]):
row_ret = dict(list(record.items()) + list(mtch.items()))
output.append(row_ret)
return output
if __name__ == "__main__":
tbl1 = [
{"id": 11, "tbl1_field": "A"},
{"id": 22, "tbl1_field": "AB"},
{"id": 55, "tbl1_field": "AE"},
{"id": 33, "tbl1_field": "AC"},
{"id": 44, "tbl1_field": "AD"}
]
tbl2 = [
{"id": 11, "tbl2_field": "B"},
{"id": 11, "tbl2_field": "BB"},
{"id": 22, "tbl2_field": "BC"},
{"id": 66, "tbl2_field": "BD"},
{"id": 77, "tbl2_field": "BE"},
{"id": 88, "tbl2_field": "BF"}
]
# SELECT * FROM tb1 INNER JOIN tbl2 ON tbl1.id = tbl2.id
l_out = inner_hash_join(tbl1, tbl2, "id")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment