Created
November 11, 2021 11:48
-
-
Save kelvingakuo/778bfda412eef9e8d09be67c6bb58078 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def hash_func(ind: int, mp_size: int): | |
""" A simple hash function (uses a version of the mid-square method and modulus) | |
Params: | |
ind (int) - The index to hash | |
mp_size (int) - The size of the hash map | |
Returns: | |
key (int) - The hashed output | |
""" | |
ind_sq = ind ** 2 | |
ls_t = int(str(ind_sq)[-2:]) | |
key = ls_t % mp_size | |
return key | |
def inner_hash_join(left: List[dict], right: List[dict], on: str) -> List[dict]: | |
""" Hash JOIN: Compute hash table using smaller table. Hash outer table and probe hash map | |
INNER JOIN only implemented | |
""" | |
output = [] | |
hash_map = dict() | |
inner = left if len(left) < len(right) else right | |
outer = left if len(left) > len(right) else right | |
# Build | |
for record in inner: | |
rec_key = hash_func(record[on], 90) | |
hash_map[rec_key] = record | |
# Probe | |
for record in outer: | |
rec_key = hash_func(record[on], 90) | |
mtch = hash_map.get(rec_key, None) | |
if(mtch is None): | |
pass | |
# No match | |
else: | |
# Check the records themselves incase collisions | |
if(record[on] == mtch[on]): | |
row_ret = dict(list(record.items()) + list(mtch.items())) | |
output.append(row_ret) | |
return output | |
if __name__ == "__main__": | |
tbl1 = [ | |
{"id": 11, "tbl1_field": "A"}, | |
{"id": 22, "tbl1_field": "AB"}, | |
{"id": 55, "tbl1_field": "AE"}, | |
{"id": 33, "tbl1_field": "AC"}, | |
{"id": 44, "tbl1_field": "AD"} | |
] | |
tbl2 = [ | |
{"id": 11, "tbl2_field": "B"}, | |
{"id": 11, "tbl2_field": "BB"}, | |
{"id": 22, "tbl2_field": "BC"}, | |
{"id": 66, "tbl2_field": "BD"}, | |
{"id": 77, "tbl2_field": "BE"}, | |
{"id": 88, "tbl2_field": "BF"} | |
] | |
# SELECT * FROM tb1 INNER JOIN tbl2 ON tbl1.id = tbl2.id | |
l_out = inner_hash_join(tbl1, tbl2, "id") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment