Created
March 16, 2021 23:51
-
-
Save jeffmylife/4f605de99d30f4c401d0e9640b6e2108 to your computer and use it in GitHub Desktop.
Standardizes duplicate id's with different equivalent names, aka aliases, to an easier object.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
from operator import itemgetter | |
def aliasinate(lst, sep=';') -> dict: | |
''' | |
Parameters | |
---------- | |
lst : list | |
List of strings containing aliases in any order or combination. | |
Returns | |
------- | |
mapping : dict(set(str)) | |
Dictionary with keys being aliases with values being other keys. | |
Examples | |
-------- | |
>>> aliasinate(['a;b;c','d', 'e;c','e']) | |
{'A': {'A', 'B', 'C'}, | |
'B': {'A', 'B', 'C'}, | |
'C': {'A', 'B', 'C', 'E'}, | |
'D': {'D'}, | |
'E': {'C', 'E'}} | |
''' | |
mapper = [] | |
for i, row in enumerate(lst): | |
for alias in row.split(';'): | |
mapper.append((alias.upper(), i)) | |
# group by row number | |
mapper = sorted(mapper, key=itemgetter(1)) | |
gb_row_num = {k:[j[0] for j in list(v)] \ | |
for k,v in itertools.groupby(mapper, key=itemgetter(1))} | |
# group by alias; add aliases with same row num | |
mapper = sorted(mapper, key=itemgetter(0)) | |
dmap = dict() | |
for alias, group in itertools.groupby(mapper, key=itemgetter(0)): | |
row_nums = [i[1] for i in list(group)] | |
dmap[alias] = set(itertools.chain(*[list(gb_row_num[n]) for n in row_nums])) | |
return dmap |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment