Skip to content

Instantly share code, notes, and snippets.

@jeffmylife
Created March 16, 2021 23:51
Show Gist options
  • Save jeffmylife/4f605de99d30f4c401d0e9640b6e2108 to your computer and use it in GitHub Desktop.
Save jeffmylife/4f605de99d30f4c401d0e9640b6e2108 to your computer and use it in GitHub Desktop.
Standardizes duplicate id's with different equivalent names, aka aliases, to an easier object.
import itertools
from operator import itemgetter
def aliasinate(lst, sep=';') -> dict:
'''
Parameters
----------
lst : list
List of strings containing aliases in any order or combination.
Returns
-------
mapping : dict(set(str))
Dictionary with keys being aliases with values being other keys.
Examples
--------
>>> aliasinate(['a;b;c','d', 'e;c','e'])
{'A': {'A', 'B', 'C'},
'B': {'A', 'B', 'C'},
'C': {'A', 'B', 'C', 'E'},
'D': {'D'},
'E': {'C', 'E'}}
'''
mapper = []
for i, row in enumerate(lst):
for alias in row.split(';'):
mapper.append((alias.upper(), i))
# group by row number
mapper = sorted(mapper, key=itemgetter(1))
gb_row_num = {k:[j[0] for j in list(v)] \
for k,v in itertools.groupby(mapper, key=itemgetter(1))}
# group by alias; add aliases with same row num
mapper = sorted(mapper, key=itemgetter(0))
dmap = dict()
for alias, group in itertools.groupby(mapper, key=itemgetter(0)):
row_nums = [i[1] for i in list(group)]
dmap[alias] = set(itertools.chain(*[list(gb_row_num[n]) for n in row_nums]))
return dmap
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment