Skip to content

Instantly share code, notes, and snippets.

@neurobashing
Created April 20, 2015 14:23
Show Gist options
  • Save neurobashing/4c5145c5bc36c3acc95e to your computer and use it in GitHub Desktop.
Save neurobashing/4c5145c5bc36c3acc95e to your computer and use it in GitHub Desktop.
# my kingdom for a case statement
# this should be a DEPTH-FIRST SEARCH AMIRITE
def walk_tree(self, doc):
headers = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
lists = ['ol', 'ul']
paragraphs = ['p']
specials = ['blockquote']
passes = ['br', 'root', 'img', 'thead', 'tbody', 'tr', 'td', 'th']
tables = ['table', ]
for page_element in self.elements:
if page_element.tag in headers:
doc.handle_heading(page_element)
elif page_element.tag in paragraphs:
doc.handle_paragraph(page_element)
elif page_element.tag in tables:
doc.handle_table(page_element)
elif page_element.tag in lists:
doc.handle_lists(page_element)
elif page_element.tag in specials:
doc.handle_blockquote(page_element)
elif page_element.tag in passes:
pass
else:
print "ARGH UNHANDLED ELEMENT: "
print page_element.tag, page_element.text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment