Skip to content

Instantly share code, notes, and snippets.

@ulgens
Last active February 2, 2019 01:11
Show Gist options
  • Save ulgens/bc7732ad1a454291063c08bebebb460b to your computer and use it in GitHub Desktop.
Save ulgens/bc7732ad1a454291063c08bebebb460b to your computer and use it in GitHub Desktop.
Humble Bundle Book Download
# pip install wget
# Save purchase/download page to index.html first
from lxml import html
import wget
book_list_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div"""
book_name_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[2]/div[1]/a/text()"""
download_links_path = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[3]/div/div/div/div[1]/a/@href"""
with open("index.html") as file:
content = file.read()
tree = html.fromstring(content)
books = tree.xpath(book_list_xpath)
for index, book in enumerate(books):
names = book.xpath(book_name_xpath.format(index=index))
# Garbage (header, download etc.)
if not names:
continue
name = names[0].strip()
download_links = book.xpath(download_links_path.format(index=index))
cbz_links = [l for l in download_links if "cbz" in l]
if not cbz_links:
print(f"Couldn't find CBZ for {name}")
continue
cbz_link = cbz_links[0]
print(f"{index}/{len(books)} -> {name}")
wget.download(cbz_link, f"{name}.cbz")
print("\n")
# 6/80 -> Wynonna Earp Legends: Doc Holliday #1
# 100% [........................................................................] 14278645 / 14278645
# 7/80 -> Wynonna Earp Legends: Doc Holliday #2
# 100% [........................................................................] 16174759 / 16174759
# 8/80 -> The Last Fall
# 100% [........................................................................] 69379908 / 69379908
# 9/80 -> Comic Book History of Comics
# 9% [...... ] 14303232 / 148509839
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment