Skip to content

Instantly share code, notes, and snippets.

Last active May 8, 2020
What would you like to do?,,_part_1.pdf,
from lxml import html
import requests
page = requests.get("")
tree = html.fromstring(page.text)
def geturls(desc, selector, filename):
urls = tree.xpath(selector)
print '%s: %d' % (desc, len(urls))
with open(filename, 'w') as f:
for url in urls:
url = url.rstrip('?dl=1')
f.write('%s\n' % url)
geturls('hdURLs', '//*[@class="download"]/a[1]/@href', 'HD.txt')
geturls('sdURLs', '//*[@class="download"]/a[2]/@href', 'SD.txt')
geturls('pdfURLs', '//*[@class="download"]/a[3]/@href', 'PDF.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment