Skip to content

Instantly share code, notes, and snippets.

@Valdes-Tresanco-MS
Created September 5, 2021 08:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Valdes-Tresanco-MS/13eeb9c11b6dc61b1d6d293f08a398ea to your computer and use it in GitHub Desktop.
Save Valdes-Tresanco-MS/13eeb9c11b6dc61b1d6d293f08a398ea to your computer and use it in GitHub Desktop.
Converts a pandas Multiindex to a nested dict
import pandas as pd
from types import Union # in Python 3.10 in not needed since we can define the Union as "type | type"
def multiindex2dict(p: Union[pd.MultiIndex, dict]) -> dict:
"""
Converts a pandas Multiindex to a nested dict
:parm p: As this is a recursive function, initially p is a pd.MultiIndex, but after the first iteration it takes
the internal_dict value, so it becomes to a dictionary
"""
internal_dict = {}
end = False
for x in p:
# Since multi-indexes have a descending hierarchical structure, it is convenient to start from the last
# element of each tuple. That is, we start by generating the lower level to the upper one. See the example
if isinstance(p, pd.MultiIndex):
# This checks if the tuple x without the last element has len = 1. If so, the unique value of the
# remaining tuple works as key in the new dict, otherwise the remaining tuple is used. Only for 2 levels
# pd.MultiIndex
if len(x[:-1]) == 1:
t = x[:-1][0]
end = True
else:
t = x[:-1]
if t not in internal_dict:
internal_dict[t] = [x[-1]]
else:
internal_dict[t].append(x[-1])
elif isinstance(x, tuple):
# This checks if the tuple x without the last element has len = 1. If so, the unique value of the
# remaining tuple works as key in the new dict, otherwise the remaining tuple is used
if len(x[:-1]) == 1:
t = x[:-1][0]
end = True
else:
t = x[:-1]
if t not in internal_dict:
internal_dict[t] = {x[-1]: p[x]}
else:
internal_dict[t][x[-1]] = p[x]
# Uncomment this line to know how the dictionary is generated starting from the lowest level
# print(internal_dict)
if end:
return internal_dict
return multiindex2dict(internal_dict)
##########################################################################
#
# IF YOU HAVE ANY SUGGESTIONS OR QUESTIONS, YOU CAN LEAVE A COMMENT
#
##########################################################################
##### Examples #####
arrays = [[1, 1, 1, 2, 2, 2],
['red', 'red', 'red', 'blue', 'blue', 'blue'],
[3, 3, 3, 4, 4, 4],
['black', 'black', 'black', 'white', 'white', 'white']]
p = pd.MultiIndex.from_arrays(arrays)
# Expected
# print(p)
# MultiIndex([(1, 'red', 3, 'black'),
# (1, 'red', 3, 'black'),
# (1, 'red', 3, 'black'),
# (2, 'blue', 4, 'white'),
# (2, 'blue', 4, 'white'),
# (2, 'blue', 4, 'white')],
# )
new_dict = multiindex2dict(p)
# Expected
# print(new_dict)
# {1: {'red': {3: ['black', 'black', 'black']}}, 2: {'blue': {4: ['white', 'white', 'white']}}}
#
# If we uncomment print(internal_dict) in the multiindex2dict function
# Iteration | dict result
# ------------|-------------------------------------------------------------------------------
# 1 | {(1, 'red', 3): ['black', 'black', 'black'], (2, 'blue', 4): ['white', 'white', 'white']}
# 2 | {(1, 'red'): {3: ['black', 'black', 'black']}, (2, 'blue'): {4: ['white', 'white', 'white']}}
# 3 | {1: {'red': {3: ['black', 'black', 'black']}}, 2: {'blue': {4: ['white', 'white', 'white']}}}
# This is a "complex" multiindex and work very well
# MultiIndex([('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'eel'),
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'int'),
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'pol'),
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'sas'),
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'tot'),
# ('complex', 'BDC', 'C:VAL:33', 'C:VAL:33', 'vdw'),
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'eel'),
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'int'),
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'pol'),
# ('complex', 'BDC', 'C:VAL:33', 'F:DG:4', 'sas'),
# ...
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'pol'),
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'sas'),
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'tot'),
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'),
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'),
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'),
# ( 'delta', 'TDC', 'F:DG:4', 'C:VAL:33', 'vdw'),
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'eel'),
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'int'),
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'pol'),
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'sas'),
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'tot'),
# ( 'delta', 'TDC', 'F:DG:4', 'F:DG:4', 'vdw')],
# length=180)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment