Created
May 10, 2024 11:43
-
-
Save jinkjonks/54354e9515afdedaa6e19bfca8c1e537 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
The workaround while https://github.com/nlmatics/llmsherpa/issues/79 is being resolved. | |
""" | |
from typing import Any | |
from io import StringIO | |
from llmsherpa.readers.layout_reader import Block, Document | |
class DocTree(Document): | |
def __init__(self, blocks: dict[str, Any]): | |
super().__init__(blocks) | |
def to_html(self) -> str: | |
def iter_children(node: Block, level: int, sb: StringIO): | |
for child in node.children: | |
sb.write(child.to_html()) | |
print( | |
"-" * level, | |
child.tag, | |
f"({len(child.children)})", | |
child.to_text(), | |
) | |
iter_children(child, level + 1, sb) | |
stream = StringIO() | |
iter_children(self.root_node, 0, stream) | |
return stream.getvalue() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment