Skip to content

Instantly share code, notes, and snippets.

What would you like to do? Audiobook Generator
#!/usr/bin/env python2.7
import urllib2
import subprocess
import lxml.cssselect
import lxml.etree
import mutagen
selector = lxml.cssselect.CSSSelector
def get_document(url):
"""Retrieves a URL and parses the response as an HTML document."""
data = urllib2.urlopen(url).read()
return lxml.etree.fromstring(data.decode("ascii", "ignore"), lxml.etree.HTMLParser(encoding="UTF-8"))
def get_chapters(story_id):
print "Retrieving chapter index..."
document = get_document("{}/{}/"
.format(story_id, 1))
chapter_selector = selector("select[name*=chapter]")(document)[0]
for label in lxml.etree.ElementTextIterator(chapter_selector):
n, _, title = label.partition(". ")
yield (int(n), title)
def get_chapter_text(story_id, chapter):
print "Retrieving text of chapter {}/{}".format(story_id, chapter)
document = get_document("{}/{}/"
.format(story_id, chapter))
text_element = selector("#storytext")(document)[0]
lxml.etree.strip_tags(text_element, "i", "b", "a")
return "\n\n".join(lxml.etree.ElementTextIterator(text_element))
def get_text_by_chapter(story_id, chapters):
"""Yields (number, title, body) for each chapter in a story."""
for n, title in chapters:
yield n, title, "Chapter {}: {}\n\n{}".format(n, title, get_chapter_text(story_id, n))
def say(s, *a):
"""Executes the say command with the specified data and arguments."""
subprocess.Popen(["say"] + list(a), stdin=subprocess.PIPE).communicate(s)
import mutagen.m4a
def dump_story(story_id, story_title="", story_author="", cover=None):
"""Generates a sequence of m4a files for a story."""
chapters = list(get_chapters(story_id))
methods_text = get_text_by_chapter(story_id, chapters)
for n, title, text in methods_text:
if n == 1:
if story_author:
text = "by " + story_author + "\n\n" + text
if story_title:
text = story_title + "\n\n" + text
filename = "{}-{:03d}-{}.m4a".format(story_title or story_id, n, title)
print "Writing", filename
say(text, "-o", filename)
print "Writing meta info"
info = mutagen.m4a.M4A(filename)
info["trkn"] = (n, len(chapters))
info["\xa9nam"] = "Chapter {}: {}".format(n, title)
if story_title:
info["\xa9alb"] = story_title
if story_author:
info["\xa9ART"] = story_author
if cover:
info["covr"] = cover
info["\xa9cmt"] = "Generated using Mac OS X 10.6's Speech Synthesis by a script available at"
# what I'm doing
cover = mutagen.m4a.M4ACover(open("cover.jpg", "rb").read(), mutagen.m4a.M4ACover.FORMAT_JPEG)
dump_story(5782108, "Harry Potter and the Methods of Rationality", "Eliezer Yudkowsky", cover)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.