jeremyBanks/cover.jpg

## cover.jpg

      
    Raw
  

              cover.jpg
            
          
## ffn_audiobook_generator.py
#!/usr/bin/env python2.7
import urllib2
import subprocess

import lxml.cssselect
import lxml.etree

import mutagen

selector = lxml.cssselect.CSSSelector

def get_document(url):
    """Retrieves a URL and parses the response as an HTML document."""

    data = urllib2.urlopen(url).read()

    return lxml.etree.fromstring(data.decode("ascii", "ignore"), lxml.etree.HTMLParser(encoding="UTF-8"))

def get_chapters(story_id):
    print "Retrieving chapter index..."

    document = get_document("http://www.fanfiction.net/s/{}/{}/"
                            .format(story_id, 1))
    chapter_selector = selector("select[name*=chapter]")(document)[0]

    for label in lxml.etree.ElementTextIterator(chapter_selector):
        n, _, title = label.partition(". ")
        yield (int(n), title)

def get_chapter_text(story_id, chapter):
    print "Retrieving text of chapter {}/{}".format(story_id, chapter)

    document = get_document("http://www.fanfiction.net/s/{}/{}/"
                            .format(story_id, chapter))

    text_element = selector("#storytext")(document)[0]
    lxml.etree.strip_tags(text_element, "i", "b", "a")
    return "\n\n".join(lxml.etree.ElementTextIterator(text_element))

def get_text_by_chapter(story_id, chapters):
    """Yields (number, title, body) for each chapter in a story."""

    for n, title in chapters:
        yield n, title, "Chapter {}: {}\n\n{}".format(n, title, get_chapter_text(story_id, n))

def say(s, *a):
    """Executes the say command with the specified data and arguments."""

    subprocess.Popen(["say"] + list(a), stdin=subprocess.PIPE).communicate(s)

import mutagen.m4a

def dump_story(story_id, story_title="", story_author="", cover=None):
    """Generates a sequence of m4a files for a story."""

    chapters = list(get_chapters(story_id))
    methods_text = get_text_by_chapter(story_id, chapters)

    for n, title, text in methods_text:
        if n == 1:
            if story_author:
                text = "by " + story_author + "\n\n" + text

            if story_title:
                text = story_title + "\n\n" + text

        filename = "{}-{:03d}-{}.m4a".format(story_title or story_id, n, title)

        print "Writing", filename
        say(text, "-o", filename)
        print "Writing meta info"

        info = mutagen.m4a.M4A(filename)
        info["trkn"] = (n, len(chapters))

        info["\xa9nam"] = "Chapter {}: {}".format(n, title)

        if story_title:
            info["\xa9alb"] = story_title

        if story_author:
            info["\xa9ART"] = story_author

        if cover:
            info["covr"] = cover

        info["\xa9cmt"] = "Generated using Mac OS X 10.6's Speech Synthesis by a script available at https://gist.github.com/973183"

        info.save()

# what I'm doing
cover = mutagen.m4a.M4ACover(open("cover.jpg", "rb").read(), mutagen.m4a.M4ACover.FORMAT_JPEG)
dump_story(5782108, "Harry Potter and the Methods of Rationality", "Eliezer Yudkowsky", cover)
	#!/usr/bin/env python2.7
	import urllib2
	import subprocess

	import lxml.cssselect
	import lxml.etree

	import mutagen

	selector = lxml.cssselect.CSSSelector

	def get_document(url):
	"""Retrieves a URL and parses the response as an HTML document."""

	data = urllib2.urlopen(url).read()

	return lxml.etree.fromstring(data.decode("ascii", "ignore"), lxml.etree.HTMLParser(encoding="UTF-8"))

	def get_chapters(story_id):
	print "Retrieving chapter index..."

	document = get_document("http://www.fanfiction.net/s/{}/{}/"
	.format(story_id, 1))
	chapter_selector = selector("select[name*=chapter]")(document)[0]

	for label in lxml.etree.ElementTextIterator(chapter_selector):
	n, _, title = label.partition(". ")
	yield (int(n), title)

	def get_chapter_text(story_id, chapter):
	print "Retrieving text of chapter {}/{}".format(story_id, chapter)

	document = get_document("http://www.fanfiction.net/s/{}/{}/"
	.format(story_id, chapter))

	text_element = selector("#storytext")(document)[0]
	lxml.etree.strip_tags(text_element, "i", "b", "a")
	return "\n\n".join(lxml.etree.ElementTextIterator(text_element))

	def get_text_by_chapter(story_id, chapters):
	"""Yields (number, title, body) for each chapter in a story."""

	for n, title in chapters:
	yield n, title, "Chapter {}: {}\n\n{}".format(n, title, get_chapter_text(story_id, n))

	def say(s, *a):
	"""Executes the say command with the specified data and arguments."""

	subprocess.Popen(["say"] + list(a), stdin=subprocess.PIPE).communicate(s)

	import mutagen.m4a

	def dump_story(story_id, story_title="", story_author="", cover=None):
	"""Generates a sequence of m4a files for a story."""

	chapters = list(get_chapters(story_id))
	methods_text = get_text_by_chapter(story_id, chapters)

	for n, title, text in methods_text:
	if n == 1:
	if story_author:
	text = "by " + story_author + "\n\n" + text

	if story_title:
	text = story_title + "\n\n" + text

	filename = "{}-{:03d}-{}.m4a".format(story_title or story_id, n, title)

	print "Writing", filename
	say(text, "-o", filename)
	print "Writing meta info"

	info = mutagen.m4a.M4A(filename)
	info["trkn"] = (n, len(chapters))

	info["\xa9nam"] = "Chapter {}: {}".format(n, title)

	if story_title:
	info["\xa9alb"] = story_title

	if story_author:
	info["\xa9ART"] = story_author

	if cover:
	info["covr"] = cover

	info["\xa9cmt"] = "Generated using Mac OS X 10.6's Speech Synthesis by a script available at https://gist.github.com/973183"

	info.save()

	# what I'm doing
	cover = mutagen.m4a.M4ACover(open("cover.jpg", "rb").read(), mutagen.m4a.M4ACover.FORMAT_JPEG)
	dump_story(5782108, "Harry Potter and the Methods of Rationality", "Eliezer Yudkowsky", cover)