Skip to content

Instantly share code, notes, and snippets.

@gabalese
Last active December 22, 2015 00:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gabalese/6391443 to your computer and use it in GitHub Desktop.
Save gabalese/6391443 to your computer and use it in GitHub Desktop.
Raw script to build a quick and dirty preview from an existing full EPUB. Useful to send out previews of existing ebooks.
#! /usr/bin/env python
# by Gabriele Alese <gabriele@alese.it> / http://www.alese.it
# Released in Public Domain where applicable: http://creativecommons.org/publicdomain/zero/1.0/
# ***
# REQUIREMENTS:
# this script makes use of pyepub 2.0.9
# download here: https://github.com/gabalese/pyepub
# ***
# USAGE:
# $ python previewbuilder.py [files...]
# output will have the same filename, with "preview_" prepended
import sys
try:
from pyepub import EPUB
except ImportError:
print "pyepub module not found: see https://github.com/gabalese/pyepub"
sys.exit(1)
import mimetypes
import os
from StringIO import StringIO
import xml.etree.ElementTree as ET
def previewfy(path):
epub = EPUB(path, "r")
# if no type="text" is found, provide 20% of content
num = int(len(epub.info["spine"]) / 100.00 * 20.00)
items = [x for x in epub.info["spine"][:num]]
itemslist = []
for item in items:
for manifest in epub.info["manifest"]:
if item["idref"] == manifest["id"]:
itemslist.append(manifest["href"])
fakefile = StringIO()
output = EPUB(fakefile, "w", title=epub.info["metadata"]["title"], language=epub.info["metadata"]["language"])
src = []
for i in itemslist:
from htmlentitydefs import entitydefs
parser = ET.XMLParser()
parser.parser.UseForeignDTD(True)
parser.entity.update(entitydefs)
filelike = StringIO(epub.read(os.path.join(epub.root_folder, i)))
root = ET.parse(filelike, parser)
map(src.append, [os.path.normpath(os.path.join(os.path.dirname(os.path.join(epub.root_folder, i)), x.attrib["src"]))
for x in root.findall(".//*[@src]")] +
[os.path.normpath(os.path.join(os.path.dirname(os.path.join(epub.root_folder, i)), x.attrib["href"]))
for x in root.findall(".//{http://www.w3.org/1999/xhtml}link[@href]")])
src = list(set(src)) # remove multiple references
# add non-part manifest items
for i in src:
output.additem(epub.read(i), i.replace(epub.root_folder+"/", ""), mimetypes.guess_type(i)[0])
# add selected parts
for i in itemslist:
output.addpart(epub.read(os.path.join(epub.root_folder, i)), i, "application/xhtml+xml")
output.close()
output.writetodisk("preview_"+os.path.basename(path))
if __name__ == '__main__':
for i in sys.argv[1:]:
previewfy(i)
@gabalese
Copy link
Author

Please note: I know the files generated by this script do not pass epubcheck. Maybe I'll amend that, but don't hold your breath.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment