Last active
August 29, 2015 14:01
-
-
Save treyhunner/21f307b975027be5162d to your computer and use it in GitHub Desktop.
Search for DRM-free audiobooks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Search downpour.com and emusic.com for DRM-free audiobooks | |
Usage:: | |
./find_audiobooks.py <title>... | |
File released to the public domain under CC0 license: | |
http://creativecommons.org/publicdomain/zero/1.0/deed | |
Requires purl and beautifulsoup4:: | |
$ pip install purl beautifulsoup4 | |
""" | |
from __future__ import unicode_literals | |
import sys | |
from itertools import chain, izip_longest | |
import urllib2 | |
from bs4 import BeautifulSoup | |
from purl import URL | |
def unescape(text): | |
"""Return string without smart apostrophes""" | |
return text.replace('\u2019', "'") | |
def get_downpour_url(book_name): | |
"""Return search URL for downpour.com""" | |
base = URL("http://www.downpour.com/catalogsearch/result/") | |
return base.query_param('q', book_name).as_string() | |
def get_emusic_url(book_name): | |
"""Return search URL for emusic.com""" | |
base_url = URL("http://www.emusic.com/search/book/") | |
return base_url.query_param('s', book_name).as_string() | |
def search_downpour(book_name): | |
"""Search Downpour and return list of parsed results""" | |
response = urllib2.urlopen(get_downpour_url(book_name)) | |
page = BeautifulSoup(response) | |
books = page.find_all('li', attrs={'class': "item"}) | |
results = [] | |
for book in books: | |
header = book.find(attrs={'class': "product-name"}) | |
link_tag = header.find('a') | |
title = ' '.join(unescape(x) | |
for x in link_tag.stripped_strings) | |
link = link_tag['href'] | |
for node in book.find_all(attrs={'class': 'author'}): | |
author_text = node.text | |
if author_text.startswith('By'): | |
author = author_text[2:].strip() | |
results.append({ | |
'title': title, | |
'link': link, | |
'author': author, | |
}) | |
return results | |
def search_emusic(book_name): | |
"""Search eMusic and return list of parsed results""" | |
response = urllib2.urlopen(get_emusic_url(book_name)) | |
page = BeautifulSoup(response) | |
books = page.find_all('li', attrs={'class': "bundle"}) | |
results = [] | |
for book in books: | |
link_tag = book.find('h4').find('a') | |
author_tag = book.find('h5') | |
results.append({ | |
'title': link_tag.text, | |
'link': link_tag['href'], | |
'author': author_tag.text, | |
}) | |
return results | |
def print_result(result): | |
"""Print title, author, and link for audiobook result""" | |
print "Title: {}".format(result['title']) | |
print "Author: {}".format(result['author']) | |
print "Link: {}".format(result['link']) | |
def merge_lists(*lists): | |
"""Return merge of lists by alternating elements of each""" | |
combined_lists = chain.from_iterable(izip_longest(*lists)) | |
return list(filter(bool, combined_lists)) | |
def main(*args): | |
"""Search audiobook sites and return search results""" | |
for book_name in args: | |
results1 = search_downpour(book_name) | |
results2 = search_emusic(book_name) | |
results = merge_lists(results1[:3], results2[:3]) | |
for result in results: | |
print_result(result) | |
if __name__ == "__main__": | |
main(*sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment