Skip to content

Instantly share code, notes, and snippets.

@jeremyBanks
Created May 15, 2011 14:01
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeremyBanks/973183 to your computer and use it in GitHub Desktop.
Save jeremyBanks/973183 to your computer and use it in GitHub Desktop.
FanFiction.net Audiobook Generator
#!/usr/bin/env python2.7
import urllib2
import subprocess
import lxml.cssselect
import lxml.etree
import mutagen
selector = lxml.cssselect.CSSSelector
def get_document(url):
"""Retrieves a URL and parses the response as an HTML document."""
data = urllib2.urlopen(url).read()
return lxml.etree.fromstring(data.decode("ascii", "ignore"), lxml.etree.HTMLParser(encoding="UTF-8"))
def get_chapters(story_id):
print "Retrieving chapter index..."
document = get_document("http://www.fanfiction.net/s/{}/{}/"
.format(story_id, 1))
chapter_selector = selector("select[name*=chapter]")(document)[0]
for label in lxml.etree.ElementTextIterator(chapter_selector):
n, _, title = label.partition(". ")
yield (int(n), title)
def get_chapter_text(story_id, chapter):
print "Retrieving text of chapter {}/{}".format(story_id, chapter)
document = get_document("http://www.fanfiction.net/s/{}/{}/"
.format(story_id, chapter))
text_element = selector("#storytext")(document)[0]
lxml.etree.strip_tags(text_element, "i", "b", "a")
return "\n\n".join(lxml.etree.ElementTextIterator(text_element))
def get_text_by_chapter(story_id, chapters):
"""Yields (number, title, body) for each chapter in a story."""
for n, title in chapters:
yield n, title, "Chapter {}: {}\n\n{}".format(n, title, get_chapter_text(story_id, n))
def say(s, *a):
"""Executes the say command with the specified data and arguments."""
subprocess.Popen(["say"] + list(a), stdin=subprocess.PIPE).communicate(s)
import mutagen.m4a
def dump_story(story_id, story_title="", story_author="", cover=None):
"""Generates a sequence of m4a files for a story."""
chapters = list(get_chapters(story_id))
methods_text = get_text_by_chapter(story_id, chapters)
for n, title, text in methods_text:
if n == 1:
if story_author:
text = "by " + story_author + "\n\n" + text
if story_title:
text = story_title + "\n\n" + text
filename = "{}-{:03d}-{}.m4a".format(story_title or story_id, n, title)
print "Writing", filename
say(text, "-o", filename)
print "Writing meta info"
info = mutagen.m4a.M4A(filename)
info["trkn"] = (n, len(chapters))
info["\xa9nam"] = "Chapter {}: {}".format(n, title)
if story_title:
info["\xa9alb"] = story_title
if story_author:
info["\xa9ART"] = story_author
if cover:
info["covr"] = cover
info["\xa9cmt"] = "Generated using Mac OS X 10.6's Speech Synthesis by a script available at https://gist.github.com/973183"
info.save()
# what I'm doing
cover = mutagen.m4a.M4ACover(open("cover.jpg", "rb").read(), mutagen.m4a.M4ACover.FORMAT_JPEG)
dump_story(5782108, "Harry Potter and the Methods of Rationality", "Eliezer Yudkowsky", cover)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment