Skip to content

Instantly share code, notes, and snippets.

@LinnTroll
Last active Aug 29, 2015
Embed
What would you like to do?
import requests
import lxml.etree
import lxml.html
def get_catalog(url):
req = requests.get(url)
html = req.text
doc = lxml.html.document_fromstring(html)
listCatalog = doc.xpath('//section[@class="catalog"]/ul')
return listCatalog
def parse_ul(listUl, level=0):
for elemUL in listUl:
listLI = elemUL.xpath('li')
if listLI:
for elemLI in listLI:
nameCat = elemLI.xpath('a/text()')[0].strip()
print(u"%s%s" % (level * '\t', nameCat))
newUL = elemLI.xpath('ul')
if newUL:
parse_ul(newUL, level=level+1)
listCatalog = get_catalog('https://dl.dropboxusercontent.com/u/78886431/testparse.html')
parse_ul(listCatalog)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment