Skip to content

Instantly share code, notes, and snippets.

@angely-dev
Last active May 16, 2023 07:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save angely-dev/0729c24618e40876b38c81c52278064f to your computer and use it in GitHub Desktop.
Save angely-dev/0729c24618e40876b38c81c52278064f to your computer and use it in GitHub Desktop.
Indented text to tree in Python (n-ary tree)
from json import JSONEncoder
#
# Node class: it just consists of a name and a list of children.
#
class Node:
def __init__(self, name: str):
self.name = name
self.children = []
def __str__(self):
return self.name
def deep_print(self, indent_level: int = 0):
print(indent_level * ' ', end='')
print(self, f'({len(self.children)} children)')
[child.deep_print(indent_level + 1) for child in self.children]
#
# Node encoder: for JSON serialization (optional).
#
class NodeEncoder(JSONEncoder):
def default(self, obj):
if isinstance(obj, Node):
return {obj.name: obj.children}
return JSONEncoder.default(self, obj)
#
# Converts an indented text to a tree.
# A KeyError may be raised if the text is badly indented.
#
def text2tree(indented_text: str):
tree = Node('tree')
last_parent = {0: tree} # last parents encountered by indent level
for line in indented_text.splitlines():
node = Node(name=line.lstrip(' '))
indent_level = len(line) - len(node.name)
last_parent[indent_level].children.append(node)
last_parent[indent_level + 1] = node
return tree
@angely-dev
Copy link
Author

Usage

File indented_text.txt:

a
 a
 b
b
 a
  a
  b
 b
c
c
d

Python main.py:

from json import dumps
from tree import text2tree, NodeEncoder

tree = text2tree(open('indented_text.txt').read())
tree.deep_print()
print(dumps(tree, indent=2, cls=NodeEncoder))

Output:

tree (5 children)
 a (2 children)
  a (0 children)
  b (0 children)
 b (2 children)
  a (2 children)
   a (0 children)
   b (0 children)
  b (0 children)
 c (0 children)
 c (0 children)
 d (0 children)
{
  "tree": [
    {
      "a": [
        {
          "a": []
        },
        {
          "b": []
        }
      ]
    },
    {
      "b": [
        {
          "a": [
            {
              "a": []
            },
            {
              "b": []
            }
          ]
        },
        {
          "b": []
        }
      ]
    },
    {
      "c": []
    },
    {
      "c": []
    },
    {
      "d": []
    }
  ]
}

Alternatively, to dump leaves only (without the tree node):

print(dumps(tree.children, indent=2, cls=NodeEncoder))

@angely-dev
Copy link
Author

Alternate version

A minimal dict only version (without the Node class):

from json import dumps

#
# Converts an indented text to a tree.
#
# Each line is assumed to be unique per indented block.
# A KeyError may be raised if the text is badly indented.
#
def text2tree(indented_text: str):
    tree = {}
    last_parent = {0: tree} # last parents encountered by indent level

    for line in indented_text.splitlines():
        child_name = line.lstrip(' ')
        child_level = len(line) - len(child_name)
        last_parent[child_level][child_name] = {}
        last_parent[child_level + 1] = last_parent[child_level][child_name]

    return tree

indented_text = """a
 a
 b
b
 a
  a
  b
 b
c
c
d"""

tree = text2tree(indented_text)
print(dumps(tree, indent=2))

This script should run "as is". You can copy-paste it.

Output:

{
  "a": {
    "a": {},
    "b": {}
  },
  "b": {
    "a": {
      "a": {},
      "b": {}
    },
    "b": {}
  },
  "c": {},
  "d": {}
}

In this version, each line of the text is assumed to be unique per indented block (since it is used as dict key). Therefore, c appears only once.

I used a similar version in DiffPlus, a module to compute a diff between two indented configs. You may want to take a look.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment