Skip to content

Instantly share code, notes, and snippets.

@philiptzou
Last active November 2, 2015 01:19
Show Gist options
  • Save philiptzou/811ba8678c7a735b6953 to your computer and use it in GitHub Desktop.
Save philiptzou/811ba8678c7a735b6953 to your computer and use it in GitHub Desktop.
Inheritance structure of the various "LT" objects (https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py)
# -*- coding: utf-8 -*-
from inspect import isclass
from pdfminer import layout
class _LTNode(object):
__singletons = {}
def __new__(cls, lt_class):
if lt_class in cls.__singletons:
return cls.__singletons[lt_class]
self = super(_LTNode, cls).__new__(cls)
self.lt_class = lt_class
self._subs = []
cls.__singletons[lt_class] = self
for parent_lt_class in self.lt_class.__bases__:
if parent_lt_class is object:
continue
parent_node = _LTNode(parent_lt_class)
parent_node.append(self)
return self
def is_top(self):
return self.lt_class.__bases__ == (object,)
def append(self, node):
self._subs.append(node)
def print_tree(self, offset_string=''):
lt_name = self.lt_class.__name__
lt_name_len = len(lt_name)
offset_string += (' ' * (lt_name_len / 2))
print lt_name
subs_len = len(self._subs)
for idx, sub in enumerate(self._subs):
print offset_string + '|'
print offset_string + '|'
print offset_string + '+---',
sub.print_tree(offset_string + (' '
if idx + 1 == subs_len
else '| '))
def main():
top_nodes = []
for key in dir(layout):
if not key.startswith('LT'):
continue
lt_class = getattr(layout, key)
if not isclass(lt_class):
continue
node = _LTNode(lt_class)
if node.is_top():
top_nodes.append(node)
for top_node in top_nodes:
top_node.print_tree()
print
print
if __name__ == '__main__':
main()
LTItem
|
|
+--- LTAnno
|
|
+--- LTComponent
|
|
+--- LTChar
|
|
+--- LTContainer
| |
| |
| +--- LTExpandableContainer
| | |
| | |
| | +--- LTTextContainer
| | |
| | |
| | +--- LTTextBox
| | | |
| | | |
| | | +--- LTTextBoxHorizontal
| | | |
| | | |
| | | +--- LTTextBoxVertical
| | |
| | |
| | +--- LTTextGroup
| | | |
| | | |
| | | +--- LTTextGroupLRTB
| | | |
| | | |
| | | +--- LTTextGroupTBRL
| | |
| | |
| | +--- LTTextLine
| | |
| | |
| | +--- LTTextLineHorizontal
| | |
| | |
| | +--- LTTextLineVertical
| |
| |
| +--- LTLayoutContainer
| |
| |
| +--- LTFigure
| |
| |
| +--- LTPage
|
|
+--- LTCurve
| |
| |
| +--- LTLine
| |
| |
| +--- LTRect
|
|
+--- LTImage
LTText
|
|
+--- LTAnno
|
|
+--- LTChar
|
|
+--- LTTextContainer
|
|
+--- LTTextBox
| |
| |
| +--- LTTextBoxHorizontal
| |
| |
| +--- LTTextBoxVertical
|
|
+--- LTTextGroup
| |
| |
| +--- LTTextGroupLRTB
| |
| |
| +--- LTTextGroupTBRL
|
|
+--- LTTextLine
|
|
+--- LTTextLineHorizontal
|
|
+--- LTTextLineVertical
@philiptzou
Copy link
Author

Well there're some multiple inheritance cases this simple script could not properly handle. For example LTAnno is inherited from both LTText and LTItem, or LTChar is inherited from LTComponent and LTText. Also there are mro sequence behind the inheritance.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment