Created
September 5, 2021 08:57
-
-
Save Valdes-Tresanco-MS/13eeb9c11b6dc61b1d6d293f08a398ea to your computer and use it in GitHub Desktop.
Converts a pandas Multiindex to a nested dict
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from types import Union # in Python 3.10 in not needed since we can define the Union as "type | type" | |
def multiindex2dict(p: Union[pd.MultiIndex, dict]) -> dict: | |
""" | |
Converts a pandas Multiindex to a nested dict | |
:parm p: As this is a recursive function, initially p is a pd.MultiIndex, but after the first iteration it takes | |
the internal_dict value, so it becomes to a dictionary | |
""" | |
internal_dict = {} | |
end = False | |
for x in p: | |
# Since multi-indexes have a descending hierarchical structure, it is convenient to start from the last | |
# element of each tuple. That is, we start by generating the lower level to the upper one. See the example | |
if isinstance(p, pd.MultiIndex): | |
# This checks if the tuple x without the last element has len = 1. If so, the unique value of the | |
# remaining tuple works as key in the new dict, otherwise the remaining tuple is used. Only for 2 levels | |
# pd.MultiIndex | |
if len(x[:-1]) == 1: | |
t = x[:-1][0] | |
end = True | |
else: | |
t = x[:-1] | |
if t not in internal_dict: | |
internal_dict[t] = [x[-1]] | |
else: | |
internal_dict[t].append(x[-1]) | |
elif isinstance(x, tuple): | |
# This checks if the tuple x without the last element has len = 1. If so, the unique value of the | |
# remaining tuple works as key in the new dict, otherwise the remaining tuple is used | |
if len(x[:-1]) == 1: | |
t = x[:-1][0] | |
end = True | |
else: | |
t = x[:-1] | |
if t not in internal_dict: | |
internal_dict[t] = {x[-1]: p[x]} | |
else: | |
internal_dict[t][x[-1]] = p[x] | |
# Uncomment this line to know how the dictionary is generated starting from the lowest level | |
# print(internal_dict) | |
if end: | |
return internal_dict | |
return multiindex2dict(internal_dict) | |
########################################################################## | |
# | |
# IF YOU HAVE ANY SUGGESTIONS OR QUESTIONS, YOU CAN LEAVE A COMMENT | |
# | |
########################################################################## | |
##### Examples ##### | |
arrays = [[1, 1, 1, 2, 2, 2], | |
['red', 'red', 'red', 'blue', 'blue', 'blue'], | |
[3, 3, 3, 4, 4, 4], | |
['black', 'black', 'black', 'white', 'white', 'white']] | |
p = pd.MultiIndex.from_arrays(arrays) | |
# Expected | |
# print(p) | |
# MultiIndex([(1, 'red', 3, 'black'), | |
# (1, 'red', 3, 'black'), | |
# (1, 'red', 3, 'black'), | |
# (2, 'blue', 4, 'white'), | |
# (2, 'blue', 4, 'white'), | |
# (2, 'blue', 4, 'white')], | |
# ) | |
new_dict = multiindex2dict(p) | |
# Expected | |
# print(new_dict) | |
# {1: {'red': {3: ['black', 'black', 'black']}}, 2: {'blue': {4: ['white', 'white', 'white']}}} | |
# | |
# If we uncomment print(internal_dict) in the multiindex2dict function | |
# Iteration | dict result | |
# ------------|------------------------------------------------------------------------------- | |
# 1 | {(1, 'red', 3): ['black', 'black', 'black'], (2, 'blue', 4): ['white', 'white', 'white']} | |
# 2 | {(1, 'red'): {3: ['black', 'black', 'black']}, (2, 'blue'): {4: ['white', 'white', 'white']}} | |
# 3 | {1: {'red': {3: ['black', 'black', 'black']}}, 2: {'blue': {4: ['white', 'white', 'white']}}} | |
# This is a "complex" multiindex and work very well | |
# MultiIndex([('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'eel'), | |
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'int'), | |
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'pol'), | |
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'sas'), | |
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'tot'), | |
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'vdw'), | |
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'eel'), | |
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'int'), | |
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'pol'), | |
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'sas'), | |
# ... | |
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'pol'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'sas'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'tot'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'eel'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'int'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'pol'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'sas'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'tot'), | |
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'vdw')], | |
# length=180) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment