Skip to content

Instantly share code, notes, and snippets.

@airtonix
Forked from jamesoutterside/django_epub.py
Created April 11, 2013 20:10
Show Gist options
  • Save airtonix/5366768 to your computer and use it in GitHub Desktop.
Save airtonix/5366768 to your computer and use it in GitHub Desktop.
# Copyright (c) 2012, Bin Tan
# This file is distributed under the BSD Licence. See python-epub-builder-license.txt for details.
# James Outterside - Modified code from http://code.google.com/p/python-epub-builder/source/browse/trunk/epub.py to use
# use django templates instead of genshi
import itertools
import mimetypes
import os
import shutil
import subprocess
import uuid
import zipfile
from django.template import RequestContext,loader, Context
from lxml import etree
class TocMapNode:
def __init__(self):
self.playOrder = 0
self.title = ''
self.href = ''
self.children = []
self.depth = 0
def assignPlayOrder(self):
nextPlayOrder = [0]
self.__assignPlayOrder(nextPlayOrder)
def __assignPlayOrder(self, nextPlayOrder):
self.playOrder = nextPlayOrder[0]
nextPlayOrder[0] = self.playOrder + 1
for child in self.children:
child.__assignPlayOrder(nextPlayOrder)
class EpubItem:
def __init__(self):
self.id = ''
self.srcPath = ''
self.destPath = ''
self.mimeType = ''
self.html = ''
class EpubBook:
def __init__(self):
self.loader = loader
self.rootDir = ''
self.UUID = uuid.uuid1()
self.lang = 'en-US'
self.title = ''
self.creators = []
self.metaInfo = []
self.imageItems = {}
self.htmlItems = {}
self.cssItems = {}
self.coverImage = None
self.titlePage = None
self.tocPage = None
self.spine = []
self.guide = {}
self.tocMapRoot = TocMapNode()
self.lastNodeAtDepth = {0 : self.tocMapRoot}
def setTitle(self, title):
self.title = title
def setLang(self, lang):
self.lang = lang
def addCreator(self, name, role = 'aut'):
self.creators.append((name, role))
def addMeta(self, metaName, metaValue, **metaAttrs):
self.metaInfo.append((metaName, metaValue, metaAttrs))
def getMetaTags(self):
l = []
for metaName, metaValue, metaAttr in self.metaInfo:
beginTag = '<dc:%s' % metaName
if metaAttr:
for attrName, attrValue in metaAttr.iteritems():
beginTag += ' opf:%s="%s"' % (attrName, attrValue)
beginTag += '>'
endTag = '</dc:%s>' % metaName
l.append((beginTag, metaValue, endTag))
return l
def getImageItems(self):
return sorted(self.imageItems.values(), key = lambda x : x.id)
def getHtmlItems(self):
return sorted(self.htmlItems.values(), key = lambda x : x.id)
def getCssItems(self):
return sorted(self.cssItems.values(), key = lambda x : x.id)
def getAllItems(self):
return sorted(itertools.chain(self.imageItems.values(), self.htmlItems.values(), self.cssItems.values()), key = lambda x : x.id)
def addImage(self, srcPath, destPath):
item = EpubItem()
item.id = 'image_%d' % (len(self.imageItems) + 1)
item.srcPath = srcPath
item.destPath = destPath
item.mimeType = mimetypes.guess_type(destPath)[0]
assert item.destPath not in self.imageItems
self.imageItems[destPath] = item
return item
def addHtmlForImage(self, imageItem):
t = loader.get_template('epub/create/image.html')
c = Context({ 'book': self, 'item': imageItem })
rendered = t.render(c)
return self.addHtml('', '%s.html' % imageItem.destPath, rendered)
def addHtml(self, srcPath, destPath, html):
item = EpubItem()
item.id = 'html_%d' % (len(self.htmlItems) + 1)
item.srcPath = srcPath
item.destPath = destPath
item.html = html
item.mimeType = 'application/xhtml+xml'
assert item.destPath not in self.htmlItems
self.htmlItems[item.destPath] = item
return item
def addCss(self, srcPath, destPath):
item = EpubItem()
item.id = 'css_%d' % (len(self.cssItems) + 1)
item.srcPath = srcPath
item.destPath = destPath
item.mimeType = 'text/css'
assert item.destPath not in self.cssItems
self.cssItems[item.destPath] = item
return item
def addCover(self, srcPath):
assert not self.coverImage
_, ext = os.path.splitext(srcPath)
destPath = 'cover%s' % ext
self.coverImage = self.addImage(srcPath, destPath)
#coverPage = self.addHtmlForImage(self.coverImage)
#self.addSpineItem(coverPage, False, -300)
#self.addGuideItem(coverPage.destPath, 'Cover', 'cover')
def __makeTitlePage(self):
if self.titlePage.html:
return
t = loader.get_template('epub/create/title-page.html')
c = Context({ 'book': self })
rendered = t.render(c)
self.titlePage.html = rendered
def addTitlePage(self, html = ''):
assert not self.titlePage
self.titlePage = self.addHtml('', 'title-page.html', html)
self.addSpineItem(self.titlePage, True, -200)
self.addGuideItem('title-page.html', 'Title Page', 'title-page')
def __makeTocPage(self):
t = loader.get_template('epub/create/toc.html')
c = Context({ 'book': self })
rendered = t.render(c)
self.tocPage.html = rendered
def addTocPage(self):
assert not self.tocPage
self.tocPage = self.addHtml('', 'toc.html', '')
self.addSpineItem(self.tocPage, False, -100)
self.addGuideItem('toc.html', 'Table of Contents', 'toc')
def getSpine(self):
return sorted(self.spine)
def addSpineItem(self, item, linear = True, order = None):
assert item.destPath in self.htmlItems
if order == None:
order = (max(order for order, _, _ in self.spine) if self.spine else 0) + 1
self.spine.append((order, item, linear))
def getGuide(self):
return sorted(self.guide.values(), key = lambda x : x[2])
def addGuideItem(self, href, title, type):
assert type not in self.guide
self.guide[type] = (href, title, type)
def getTocMapRoot(self):
return self.tocMapRoot
def getTocMapHeight(self):
return max(self.lastNodeAtDepth.keys())
def addTocMapNode(self, href, title, depth = None, parent = None):
node = TocMapNode()
node.href = href
node.title = title
if parent == None:
if depth == None:
parent = self.tocMapRoot
else:
parent = self.lastNodeAtDepth[depth - 1]
parent.children.append(node)
node.depth = parent.depth + 1
self.lastNodeAtDepth[node.depth] = node
return node
def makeDirs(self):
try:
os.makedirs(os.path.join(self.rootDir, 'META-INF'))
except OSError:
pass
try:
os.makedirs(os.path.join(self.rootDir, 'OEBPS'))
except OSError:
pass
def __writeContainerXML(self):
fout = open(os.path.join(self.rootDir, 'META-INF', 'container.xml'), 'w')
t = loader.get_template('epub/create/container.xml')
rendered = t.render(Context({}))
fout.write(rendered)
fout.close()
def __writeTocNCX(self):
self.tocMapRoot.assignPlayOrder()
fout = open(os.path.join(self.rootDir, 'OEBPS', 'toc.ncx'), 'w')
t = loader.get_template('epub/create/toc.ncx')
c = Context({ 'book': self })
rendered = t.render(c)
fout.write(rendered)
fout.close()
def __writeContentOPF(self):
fout = open(os.path.join(self.rootDir, 'OEBPS', 'content.opf'), 'w')
t = loader.get_template('epub/create/content.opf')
c = Context({ 'book': self })
rendered = t.render(c)
fout.write(rendered)
fout.close()
def __writeItems(self):
for item in self.getAllItems():
print item.id, item.destPath
if item.html:
fout = open(os.path.join(self.rootDir, 'OEBPS', item.destPath), 'w')
fout.write(item.html)
fout.close()
else:
shutil.copyfile(item.srcPath, os.path.join(self.rootDir, 'OEBPS', item.destPath))
def __writeMimeType(self):
fout = open(os.path.join(self.rootDir, 'mimetype'), 'w')
fout.write('application/epub+zip')
fout.close()
@staticmethod
def __listManifestItems(contentOPFPath):
tree = etree.parse(contentOPFPath)
return tree.xpath("//opf:manifest/opf:item/@href", namespaces = {'opf': 'http://www.idpf.org/2007/opf'})
@staticmethod
def createArchive(rootDir, outputPath):
fout = zipfile.ZipFile(outputPath, 'w')
cwd = os.getcwd()
os.chdir(rootDir)
fout.write('mimetype', compress_type = zipfile.ZIP_STORED)
fileList = []
fileList.append(os.path.join('META-INF', 'container.xml'))
fileList.append(os.path.join('OEBPS', 'content.opf'))
for itemPath in EpubBook.__listManifestItems(os.path.join('OEBPS', 'content.opf')):
fileList.append(os.path.join('OEBPS', itemPath))
for filePath in fileList:
fout.write(filePath, compress_type = zipfile.ZIP_DEFLATED)
fout.close()
os.chdir(cwd)
@staticmethod
def checkEpub(checkerPath, epubPath):
subprocess.call(['java', '-jar', checkerPath, epubPath], shell = True)
def createBook(self, rootDir):
if self.titlePage:
self.__makeTitlePage()
if self.tocPage:
self.__makeTocPage()
self.rootDir = rootDir
self.makeDirs()
self.__writeMimeType()
self.__writeItems()
self.__writeContainerXML()
self.__writeContentOPF()
self.__writeTocNCX()
def test():
def getMinimalHtml(text):
return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHtml 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>%s</title></head>
<body><p>%s</p></body>
</html>
""" % (text, text)
book = EpubBook()
book.setTitle('Most Wanted Tips for Aspiring Young Pirates')
book.addCreator('Monkey D Luffy')
book.addCreator('Guybrush Threepwood')
book.addMeta('contributor', 'Smalltalk80', role = 'bkp')
book.addMeta('date', '2010', event = 'publication')
book.addTitlePage()
book.addTocPage()
book.addCover(r'D:\epub\blank.png')
book.addCss(r'main.css', 'main.css')
n1 = book.addHtml('', '1.html', getMinimalHtml('Chapter 1'))
n11 = book.addHtml('', '2.html', getMinimalHtml('Section 1.1'))
n111 = book.addHtml('', '3.html', getMinimalHtml('Subsection 1.1.1'))
n12 = book.addHtml('', '4.html', getMinimalHtml('Section 1.2'))
n2 = book.addHtml('', '5.html', getMinimalHtml('Chapter 2'))
book.addSpineItem(n1)
book.addSpineItem(n11)
book.addSpineItem(n111)
book.addSpineItem(n12)
book.addSpineItem(n2)
# You can use both forms to add TOC map
#t1 = book.addTocMapNode(n1.destPath, '1')
#t11 = book.addTocMapNode(n11.destPath, '1.1', parent = t1)
#t111 = book.addTocMapNode(n111.destPath, '1.1.1', parent = t11)
#t12 = book.addTocMapNode(n12.destPath, '1.2', parent = t1)
#t2 = book.addTocMapNode(n2.destPath, '2')
book.addTocMapNode(n1.destPath, '1')
book.addTocMapNode(n11.destPath, '1.1', 2)
book.addTocMapNode(n111.destPath, '1.1.1', 3)
book.addTocMapNode(n12.destPath, '1.2', 2)
book.addTocMapNode(n2.destPath, '2')
rootDir = r'd:\epub\test'
book.createBook(rootDir)
EpubBook.createArchive(rootDir, rootDir + '.epub')
EpubBook.checkEpub('epubcheck-1.0.5.jar', rootDir + '.epub')
if __name__ == '__main__':
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment