Last active
May 16, 2023 07:22
-
-
Save angely-dev/0729c24618e40876b38c81c52278064f to your computer and use it in GitHub Desktop.
Indented text to tree in Python (n-ary tree)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from json import JSONEncoder | |
# | |
# Node class: it just consists of a name and a list of children. | |
# | |
class Node: | |
def __init__(self, name: str): | |
self.name = name | |
self.children = [] | |
def __str__(self): | |
return self.name | |
def deep_print(self, indent_level: int = 0): | |
print(indent_level * ' ', end='') | |
print(self, f'({len(self.children)} children)') | |
[child.deep_print(indent_level + 1) for child in self.children] | |
# | |
# Node encoder: for JSON serialization (optional). | |
# | |
class NodeEncoder(JSONEncoder): | |
def default(self, obj): | |
if isinstance(obj, Node): | |
return {obj.name: obj.children} | |
return JSONEncoder.default(self, obj) | |
# | |
# Converts an indented text to a tree. | |
# A KeyError may be raised if the text is badly indented. | |
# | |
def text2tree(indented_text: str): | |
tree = Node('tree') | |
last_parent = {0: tree} # last parents encountered by indent level | |
for line in indented_text.splitlines(): | |
node = Node(name=line.lstrip(' ')) | |
indent_level = len(line) - len(node.name) | |
last_parent[indent_level].children.append(node) | |
last_parent[indent_level + 1] = node | |
return tree |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Alternate version
A minimal dict only version (without the
Node
class):This script should run "as is". You can copy-paste it.
Output:
In this version, each line of the text is assumed to be unique per indented block (since it is used as dict key). Therefore,
c
appears only once.I used a similar version in DiffPlus, a module to compute a diff between two indented configs. You may want to take a look.