Skip to content

Instantly share code, notes, and snippets.

@baurzhan-konurbayev
Last active November 11, 2022 16:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save baurzhan-konurbayev/2044e882d1aca89dd3667008bff033df to your computer and use it in GitHub Desktop.
Save baurzhan-konurbayev/2044e882d1aca89dd3667008bff033df to your computer and use it in GitHub Desktop.
Merge lists with identical IDs
input = [[1,2,3],
[3],
[4],
[5,6],
[1,7],
[2,8],
[4,9]]
"""
Output:
[[1,2,3,7,8],
[4,9],
[5,6]]
"""
def consolidate_records (list_of_lists):
# Fill out auxilary data structures to facilitate with computations
indexes_by_id = {}
ids_by_index = {}
for idx, ids_list in enumerate(list_of_lists):
ids_by_index[idx] = ids_list
for id in ids_list:
if id not in indexes_by_id:
indexes_by_id[id] = {idx}
else:
indexes_by_id[id].add(idx)
print(f"indexes_by_id={indexes_by_id}")
print(f"ids_by_index={ids_by_index}")
# Actual computations
was_updated = False
for id in indexes_by_id:
# id occurs in multiple lists
if len(indexes_by_id[id]) > 1:
was_updated = True
# merge lists with same id
combined_ids_list = []
for index in indexes_by_id[id]:
combined_ids_list += ids_by_index[index]
combined_index = list(indexes_by_id[id])[0]
ids_by_index[combined_index] = sorted(list(set(combined_ids_list)))
for delete_index in indexes_by_id[id]:
if delete_index == combined_index:
continue
for delete_id in ids_by_index[delete_index]:
if delete_id == id:
continue
if combined_index not in indexes_by_id[delete_id]:
indexes_by_id[delete_id].add(combined_index)
indexes_by_id[delete_id].remove(delete_index)
del ids_by_index[delete_index]
indexes_by_id[id] = {combined_index}
result = []
for ids_list in ids_by_index.values():
result.append(ids_list)
if was_updated:
consolidate_records(result)
return result
print( merge_lists(input))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment