Skip to content

Instantly share code, notes, and snippets.

@ChrisWeiss
Created November 10, 2017 01:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ChrisWeiss/c657a175029caa615c36011fc16b1a2e to your computer and use it in GitHub Desktop.
Save ChrisWeiss/c657a175029caa615c36011fc16b1a2e to your computer and use it in GitHub Desktop.
Script to download books purchased in a Humble Bundle. From http://paste.openstack.org/show/625665/
#!/usr/bin/python3 -ttu
# vim: ai ts=4 sts=4 et sw=4
import os
import sys
import urllib.parse
import bs4
import requests
####################################################
# Use the "Save page as" to get the HTML file to parse
SESSION = requests.Session()
def main():
with open('index.html') as in_file:
data = in_file.read()
soup = bs4.BeautifulSoup(data, 'html.parser')
count = 0
for link in soup.find_all('a'):
href = link.get('href')
if href:
parsed = urllib.parse.urlparse(href)
if parsed.path.endswith(('.pdf', '.mobi', '.epub')):
count += 1
download_file(href)
print(count)
def download_file(url):
parsed = urllib.parse.urlparse(url)
filename = os.path.basename(parsed.path)
if filename.endswith(('.pdf', '.mobi', '.epub')):
if os.path.isfile(filename):
file_size = os.path.getsize(filename)
result = SESSION.head(url)
length = int(result.headers.get('Content-Length', 0))
if length and length == file_size:
print("File already exists: {}".format(filename))
return
print("Downloading: {}".format(filename))
result = SESSION.get(url)
result.raise_for_status()
with open(filename, 'wb') as out_file:
out_file.write(result.content)
if '__main__' == __name__:
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment