Skip to content

Instantly share code, notes, and snippets.

@rgegriff
Created July 27, 2012 20:44
Show Gist options
  • Save rgegriff/3190390 to your computer and use it in GitHub Desktop.
Save rgegriff/3190390 to your computer and use it in GitHub Desktop.
__author__ = 'george griffin'
import urllib2, os, shutil, string
import xml.etree.cElementTree as ET
baseurl = "http://tuebl.com"
xmlns = "{http://www.w3.org/2005/Atom}"
def filenameize(s):
return "".join([x for x in s if x.isalpha() or x.isdigit() or x in string.whitespace])
def get_next_url(tree):
for elem in tree.iter():
if elem.get('rel') == "next": return elem.get('href')
def get_entries(tree):
return tree.findall(xmlns+"entry")
def get_epub_list(entry_list):
booklist = []
for entry in entry_list:
author = entry.find(xmlns+"author").find(xmlns+"name").text.strip()
title = entry.find(xmlns+"title").text.strip()
url = entry.find(xmlns+"link[@rel='http://opds-spec.org/acquisition']").get('href')
booklist.append({'title':title, "author":author, "url":url})
return booklist
next = "http://www.tuebl.com/catalog/titles?param=letter=all/page=0"
count = 0
while next is not None:
print "opening " + next
try:
tree = ET.parse(urllib2.urlopen(next))
root = tree.getroot()
books = get_epub_list(get_entries(root))
for book in books:
print "\tDownloading " + book['title'] + " by " + book['author']
if not os.path.exists(filenameize(book['author'])):
os.makedirs(filenameize(book['author']))
with open(filenameize(book['author'])+"/"+filenameize(book['title'])+'.epub', "w") as f:
shutil.copyfileobj(
urllib2.urlopen(baseurl+book['url']),
f
)
except IOError:
count += 1
if count == 3:
print "...Well, fuck that page."
count = 0
break
next = get_next_url(root)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment