Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from bs4 import BeautifulSoup
def max_tree_height(html):
'''
Calculate the number of HTML tag levels, iteratively
>>> max_tree_height('')
0
>>> max_tree_height('<div></div>')
1
>>> max_tree_height('<div></div><div></div>')
1
>>> max_tree_height('<div>foo</div>')
2
>>> max_tree_height('<div>foo</div><div>bar</div>')
2
>>> max_tree_height('<div class="test">foo</div>')
2
>>> max_tree_height('<div><div><div><div>foo</div></div></div></div>')
5
'''
if not html:
return 0
soup = BeautifulSoup(html, 'html.parser')
if not soup.contents:
return 0
stack = []
tree_heights = [1]
current_tree_height = 1
current_iter = iter(soup.contents)
while True:
next_item = None
try:
next_item = next(current_iter)
except StopIteration:
pass
if next_item is None:
if stack:
stack_iter, stack_tree_height = stack.pop()
current_iter = stack_iter
current_tree_height = stack_tree_height
continue
else:
break
child_items = getattr(next_item, 'contents', [])
if len(child_items) > 0:
stack.append((current_iter, current_tree_height))
current_iter = iter(next_item.contents)
current_tree_height += 1
tree_heights.append(current_tree_height)
if tree_heights:
return max(tree_heights)
return 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment