Skip to content

Instantly share code, notes, and snippets.

@shreyasms17
Last active May 3, 2021 05:27
Show Gist options
  • Save shreyasms17/789b838691eacf3faec23eaf05797ca4 to your computer and use it in GitHub Desktop.
Save shreyasms17/789b838691eacf3faec23eaf05797ca4 to your computer and use it in GitHub Desktop.
AutoFlatten unnest_dict
def unnest_dict(self, json, cur_path):
'''
Description:
This function unnests the dictionaries in the json schema recursively
and maps the hierarchical path to the field to the column name when it encounters a leaf node
:param json: [type: dict] contains metadata about the field
:param cur_path: [type: str] contains hierarchical path to that field, each parent separated by a '.'
'''
if self.is_leaf(json):
self.all_fields[f"{cur_path}.{json['name']}"] = json['name']
return
else:
if isinstance(json, list):
for i in range(len(json)):
self.unnest_dict(json[i], cur_path)
elif isinstance(json, dict):
if isinstance(json['type'], str):
cur_path = f"{cur_path}.{json['name']}" if json['type'] != 'struct' else cur_path
self.unnest_dict(json['type'], cur_path)
else:
if json['type']['type'] == 'array':
cur_path = f"{cur_path}.{json['name']}"
if isinstance(json['type']['elementType'], dict):
self.cols_to_explode.add(cur_path)
self.unnest_dict(json['type']['elementType']['fields'], cur_path)
else:
self.cols_to_explode.add(cur_path)
self.all_fields[f"{cur_path}"] = json['name']
return
elif json['type']['type'] == 'struct':
cur_path = f"{cur_path}.{json['name']}"
self.unnest_dict(json['type']['fields'], cur_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment