Skip to content

Instantly share code, notes, and snippets.

@kelvingakuo
Last active March 27, 2022 09:08
Show Gist options
  • Save kelvingakuo/76103f54d1bbf0bf69c92adcd2bacd06 to your computer and use it in GitHub Desktop.
Save kelvingakuo/76103f54d1bbf0bf69c92adcd2bacd06 to your computer and use it in GitHub Desktop.
def nested_loop_join(left: List[dict], right: List[dict], on: str, how: str) -> List[dict]:
""" Nested loop JOIN strategy: For every row in outer, find matching one in inner
If left/right JOIN and there's no match on inner, return outer. Else, skip
Params:
left (list<dict>) - List of dicts representing one relation
right (list<dict>) - List of dicts representing the other relation
on (string) - The key in the relations to JOIN on
how (string) - The JOIN (left, right, inner)
Returns:
ouput (list<dict>) - A list of dicts that match the JOIN conditions
"""
output = []
outer = left if how == "left" else right # If LEFT JOIN, the outer table is the left
inner = right if how == "left" else left # If LEFT JOIN, the inner table is the right
outer_cols = list(outer[0].keys())
inner_cols = list(inner[0].keys())
all_cols = list(set(inner_cols + outer_cols)) # List of all the possible keys (columns)
for row_out in outer: # For each row of the outer table
match = 0
attr_out = row_out[on]
for row_in in inner: # Check each row of the inner table
attr_in = row_in[on]
if(attr_in == attr_out): # If matching, we need to return all the fields in both relations
row_ret = dict(list(row_out.items()) + list(row_in.items()))
output.append(row_ret)
match = match + 1
if(match == 0): # If no match, for LEFT and RIGHT JOINs, we need to return the fields in the outer table
if(how == "left" or how == "right"):
row_ret = {key : row_out[key] if key in row_out else None for key in all_cols}
output.append(row_ret)
return output
if __name__ == "__main__":
tbl1 = [
{"id": 1, "tbl1_field": "A"},
{"id": 2, "tbl1_field": "AB"},
{"id": 5, "tbl1_field": "AE"},
{"id": 3, "tbl1_field": "AC"},
{"id": 4, "tbl1_field": "AD"}
]
tbl2 = [
{"id": 1, "tbl2_field": "B"},
{"id": 1, "tbl2_field": "BB"},
{"id": 2, "tbl2_field": "BC"},
{"id": 6, "tbl2_field": "BD"},
{"id": 7, "tbl2_field": "BE"}
]
# SELECT * FROM tb1 LEFT JOIN tbl2 ON tbl1.id = tbl2.id
l_out = nested_loop_join(tbl1, tbl2, "id", "left")
# SELECT * FROM tbl2 RIGHT JOIN tbl1 ON tbl2.id = tbl1.id
r_out = nested_loop_join(tbl2, tbl1, "id", "right")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment