import xml.etree, pandas as 🐼, bs4 as 🍲
url=\
https://bl.ocks.org/tonyfast/2947b4bb582e193f5b2a7dbf8b009b62
__import__('requests_cache').install_cache('signal')
import requests
response = requests.get(url)
tree = 🍲.BeautifulSoup(response.content, 'xml')
def ravel(soup, level=0):
if isinstance(soup, 🍲.element.Tag):
yield soup, level
for child in getattr(soup, 'children', []):
yield from ravel(child, level+1)
def tidysoup(soup):
return 🐼.DataFrame(ravel(tree), columns='tag level'.split()).reset_index().set_index('index level'.split()).tag.apply(
lambda x: 🐼.Series({**x.attrs, 'tag': x.name, 'text': x.text})
)