ulgens/humble_bundle_download.py

## humble_bundle_download.py
# pip install wget
# Save purchase/download page to index.html first

from lxml import html
import wget

book_list_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div"""
book_name_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[2]/div[1]/a/text()"""
download_links_path  = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[3]/div/div/div/div[1]/a/@href"""

with open("index.html") as file:
    content = file.read()

tree = html.fromstring(content)
books = tree.xpath(book_list_xpath)

for index, book in enumerate(books):
    names = book.xpath(book_name_xpath.format(index=index))

    # Garbage (header, download etc.)
    if not names:
        continue

    name = names[0].strip()

    download_links = book.xpath(download_links_path.format(index=index))

    cbz_links = [l for l in download_links if "cbz" in l]

    if not cbz_links:
        print(f"Couldn't find CBZ for {name}")
        continue

    cbz_link = cbz_links[0]

    print(f"{index}/{len(books)} -> {name}")
    wget.download(cbz_link, f"{name}.cbz")
    print("\n")

# 6/80 -> Wynonna Earp Legends: Doc Holliday #1
# 100% [........................................................................] 14278645 / 14278645

# 7/80 -> Wynonna Earp Legends: Doc Holliday #2
# 100% [........................................................................] 16174759 / 16174759

# 8/80 -> The Last Fall
# 100% [........................................................................] 69379908 / 69379908

# 9/80 -> Comic Book History of Comics
# 9% [......                                                                ]  14303232 / 148509839
	# pip install wget
	# Save purchase/download page to index.html first

	from lxml import html
	import wget

	book_list_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div"""
	book_name_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[2]/div[1]/a/text()"""
	download_links_path = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[3]/div/div/div/div[1]/a/@href"""

	with open("index.html") as file:
	content = file.read()

	tree = html.fromstring(content)
	books = tree.xpath(book_list_xpath)

	for index, book in enumerate(books):
	names = book.xpath(book_name_xpath.format(index=index))

	# Garbage (header, download etc.)
	if not names:
	continue

	name = names[0].strip()

	download_links = book.xpath(download_links_path.format(index=index))

	cbz_links = [l for l in download_links if "cbz" in l]

	if not cbz_links:
	print(f"Couldn't find CBZ for {name}")
	continue

	cbz_link = cbz_links[0]

	print(f"{index}/{len(books)} -> {name}")
	wget.download(cbz_link, f"{name}.cbz")
	print("\n")

	# 6/80 -> Wynonna Earp Legends: Doc Holliday #1
	# 100% [........................................................................] 14278645 / 14278645

	# 7/80 -> Wynonna Earp Legends: Doc Holliday #2
	# 100% [........................................................................] 16174759 / 16174759

	# 8/80 -> The Last Fall
	# 100% [........................................................................] 69379908 / 69379908

	# 9/80 -> Comic Book History of Comics
	# 9% [...... ] 14303232 / 148509839