Skip to content

Instantly share code, notes, and snippets.

@jamesoutterside
Last active September 7, 2022 22:42
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save jamesoutterside/3750447 to your computer and use it in GitHub Desktop.
Save jamesoutterside/3750447 to your computer and use it in GitHub Desktop.
django epub builder
# Copyright (c) 2012, Bin Tan
# This file is distributed under the BSD Licence. See python-epub-builder-license.txt for details.
# James Outterside - Modified code from http://code.google.com/p/python-epub-builder/source/browse/trunk/epub.py to use
# use django templates instead of genshi
import itertools
import mimetypes
import os
import shutil
import subprocess
import uuid
import zipfile
from django.template import RequestContext,loader, Context
from lxml import etree
class TocMapNode:
def __init__(self):
self.playOrder = 0
self.title = ''
self.href = ''
self.children = []
self.depth = 0
def assignPlayOrder(self):
nextPlayOrder = [0]
self.__assignPlayOrder(nextPlayOrder)
def __assignPlayOrder(self, nextPlayOrder):
self.playOrder = nextPlayOrder[0]
nextPlayOrder[0] = self.playOrder + 1
for child in self.children:
child.__assignPlayOrder(nextPlayOrder)
class EpubItem:
def __init__(self):
self.id = ''
self.srcPath = ''
self.destPath = ''
self.mimeType = ''
self.html = ''
class EpubBook:
def __init__(self):
self.loader = loader
self.rootDir = ''
self.UUID = uuid.uuid1()
self.lang = 'en-US'
self.title = ''
self.creators = []
self.metaInfo = []
self.imageItems = {}
self.htmlItems = {}
self.cssItems = {}
self.coverImage = None
self.titlePage = None
self.tocPage = None
self.spine = []
self.guide = {}
self.tocMapRoot = TocMapNode()
self.lastNodeAtDepth = {0 : self.tocMapRoot}
def setTitle(self, title):
self.title = title
def setLang(self, lang):
self.lang = lang
def addCreator(self, name, role = 'aut'):
self.creators.append((name, role))
def addMeta(self, metaName, metaValue, **metaAttrs):
self.metaInfo.append((metaName, metaValue, metaAttrs))
def getMetaTags(self):
l = []
for metaName, metaValue, metaAttr in self.metaInfo:
beginTag = '<dc:%s' % metaName
if metaAttr:
for attrName, attrValue in metaAttr.iteritems():
beginTag += ' opf:%s="%s"' % (attrName, attrValue)
beginTag += '>'
endTag = '</dc:%s>' % metaName
l.append((beginTag, metaValue, endTag))
return l
def getImageItems(self):
return sorted(self.imageItems.values(), key = lambda x : x.id)
def getHtmlItems(self):
return sorted(self.htmlItems.values(), key = lambda x : x.id)
def getCssItems(self):
return sorted(self.cssItems.values(), key = lambda x : x.id)
def getAllItems(self):
return sorted(itertools.chain(self.imageItems.values(), self.htmlItems.values(), self.cssItems.values()), key = lambda x : x.id)
def addImage(self, srcPath, destPath):
item = EpubItem()
item.id = 'image_%d' % (len(self.imageItems) + 1)
item.srcPath = srcPath
item.destPath = destPath
item.mimeType = mimetypes.guess_type(destPath)[0]
assert item.destPath not in self.imageItems
self.imageItems[destPath] = item
return item
def addHtmlForImage(self, imageItem):
t = loader.get_template('epub/create/image.html')
c = Context({ 'book': self, 'item': imageItem })
rendered = t.render(c)
return self.addHtml('', '%s.html' % imageItem.destPath, rendered)
def addHtml(self, srcPath, destPath, html):
item = EpubItem()
item.id = 'html_%d' % (len(self.htmlItems) + 1)
item.srcPath = srcPath
item.destPath = destPath
item.html = html
item.mimeType = 'application/xhtml+xml'
assert item.destPath not in self.htmlItems
self.htmlItems[item.destPath] = item
return item
def addCss(self, srcPath, destPath):
item = EpubItem()
item.id = 'css_%d' % (len(self.cssItems) + 1)
item.srcPath = srcPath
item.destPath = destPath
item.mimeType = 'text/css'
assert item.destPath not in self.cssItems
self.cssItems[item.destPath] = item
return item
def addCover(self, srcPath):
assert not self.coverImage
_, ext = os.path.splitext(srcPath)
destPath = 'cover%s' % ext
self.coverImage = self.addImage(srcPath, destPath)
#coverPage = self.addHtmlForImage(self.coverImage)
#self.addSpineItem(coverPage, False, -300)
#self.addGuideItem(coverPage.destPath, 'Cover', 'cover')
def __makeTitlePage(self):
if self.titlePage.html:
return
t = loader.get_template('epub/create/title-page.html')
c = Context({ 'book': self })
rendered = t.render(c)
self.titlePage.html = rendered
def addTitlePage(self, html = ''):
assert not self.titlePage
self.titlePage = self.addHtml('', 'title-page.html', html)
self.addSpineItem(self.titlePage, True, -200)
self.addGuideItem('title-page.html', 'Title Page', 'title-page')
def __makeTocPage(self):
t = loader.get_template('epub/create/toc.html')
c = Context({ 'book': self })
rendered = t.render(c)
self.tocPage.html = rendered
def addTocPage(self):
assert not self.tocPage
self.tocPage = self.addHtml('', 'toc.html', '')
self.addSpineItem(self.tocPage, False, -100)
self.addGuideItem('toc.html', 'Table of Contents', 'toc')
def getSpine(self):
return sorted(self.spine)
def addSpineItem(self, item, linear = True, order = None):
assert item.destPath in self.htmlItems
if order == None:
order = (max(order for order, _, _ in self.spine) if self.spine else 0) + 1
self.spine.append((order, item, linear))
def getGuide(self):
return sorted(self.guide.values(), key = lambda x : x[2])
def addGuideItem(self, href, title, type):
assert type not in self.guide
self.guide[type] = (href, title, type)
def getTocMapRoot(self):
return self.tocMapRoot
def getTocMapHeight(self):
return max(self.lastNodeAtDepth.keys())
def addTocMapNode(self, href, title, depth = None, parent = None):
node = TocMapNode()
node.href = href
node.title = title
if parent == None:
if depth == None:
parent = self.tocMapRoot
else:
parent = self.lastNodeAtDepth[depth - 1]
parent.children.append(node)
node.depth = parent.depth + 1
self.lastNodeAtDepth[node.depth] = node
return node
def makeDirs(self):
try:
os.makedirs(os.path.join(self.rootDir, 'META-INF'))
except OSError:
pass
try:
os.makedirs(os.path.join(self.rootDir, 'OEBPS'))
except OSError:
pass
def __writeContainerXML(self):
fout = open(os.path.join(self.rootDir, 'META-INF', 'container.xml'), 'w')
t = loader.get_template('epub/create/container.xml')
rendered = t.render(Context({}))
fout.write(rendered)
fout.close()
def __writeTocNCX(self):
self.tocMapRoot.assignPlayOrder()
fout = open(os.path.join(self.rootDir, 'OEBPS', 'toc.ncx'), 'w')
t = loader.get_template('epub/create/toc.ncx')
c = Context({ 'book': self })
rendered = t.render(c)
fout.write(rendered)
fout.close()
def __writeContentOPF(self):
fout = open(os.path.join(self.rootDir, 'OEBPS', 'content.opf'), 'w')
t = loader.get_template('epub/create/content.opf')
c = Context({ 'book': self })
rendered = t.render(c)
fout.write(rendered)
fout.close()
def __writeItems(self):
for item in self.getAllItems():
print item.id, item.destPath
if item.html:
fout = open(os.path.join(self.rootDir, 'OEBPS', item.destPath), 'w')
fout.write(item.html)
fout.close()
else:
shutil.copyfile(item.srcPath, os.path.join(self.rootDir, 'OEBPS', item.destPath))
def __writeMimeType(self):
fout = open(os.path.join(self.rootDir, 'mimetype'), 'w')
fout.write('application/epub+zip')
fout.close()
@staticmethod
def __listManifestItems(contentOPFPath):
tree = etree.parse(contentOPFPath)
return tree.xpath("//opf:manifest/opf:item/@href", namespaces = {'opf': 'http://www.idpf.org/2007/opf'})
@staticmethod
def createArchive(rootDir, outputPath):
fout = zipfile.ZipFile(outputPath, 'w')
cwd = os.getcwd()
os.chdir(rootDir)
fout.write('mimetype', compress_type = zipfile.ZIP_STORED)
fileList = []
fileList.append(os.path.join('META-INF', 'container.xml'))
fileList.append(os.path.join('OEBPS', 'content.opf'))
for itemPath in EpubBook.__listManifestItems(os.path.join('OEBPS', 'content.opf')):
fileList.append(os.path.join('OEBPS', itemPath))
for filePath in fileList:
fout.write(filePath, compress_type = zipfile.ZIP_DEFLATED)
fout.close()
os.chdir(cwd)
@staticmethod
def checkEpub(checkerPath, epubPath):
subprocess.call(['java', '-jar', checkerPath, epubPath], shell = True)
def createBook(self, rootDir):
if self.titlePage:
self.__makeTitlePage()
if self.tocPage:
self.__makeTocPage()
self.rootDir = rootDir
self.makeDirs()
self.__writeMimeType()
self.__writeItems()
self.__writeContainerXML()
self.__writeContentOPF()
self.__writeTocNCX()
def test():
def getMinimalHtml(text):
return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHtml 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>%s</title></head>
<body><p>%s</p></body>
</html>
""" % (text, text)
book = EpubBook()
book.setTitle('Most Wanted Tips for Aspiring Young Pirates')
book.addCreator('Monkey D Luffy')
book.addCreator('Guybrush Threepwood')
book.addMeta('contributor', 'Smalltalk80', role = 'bkp')
book.addMeta('date', '2010', event = 'publication')
book.addTitlePage()
book.addTocPage()
book.addCover(r'D:\epub\blank.png')
book.addCss(r'main.css', 'main.css')
n1 = book.addHtml('', '1.html', getMinimalHtml('Chapter 1'))
n11 = book.addHtml('', '2.html', getMinimalHtml('Section 1.1'))
n111 = book.addHtml('', '3.html', getMinimalHtml('Subsection 1.1.1'))
n12 = book.addHtml('', '4.html', getMinimalHtml('Section 1.2'))
n2 = book.addHtml('', '5.html', getMinimalHtml('Chapter 2'))
book.addSpineItem(n1)
book.addSpineItem(n11)
book.addSpineItem(n111)
book.addSpineItem(n12)
book.addSpineItem(n2)
# You can use both forms to add TOC map
#t1 = book.addTocMapNode(n1.destPath, '1')
#t11 = book.addTocMapNode(n11.destPath, '1.1', parent = t1)
#t111 = book.addTocMapNode(n111.destPath, '1.1.1', parent = t11)
#t12 = book.addTocMapNode(n12.destPath, '1.2', parent = t1)
#t2 = book.addTocMapNode(n2.destPath, '2')
book.addTocMapNode(n1.destPath, '1')
book.addTocMapNode(n11.destPath, '1.1', 2)
book.addTocMapNode(n111.destPath, '1.1.1', 3)
book.addTocMapNode(n12.destPath, '1.2', 2)
book.addTocMapNode(n2.destPath, '2')
rootDir = r'd:\epub\test'
book.createBook(rootDir)
EpubBook.createArchive(rootDir, rootDir + '.epub')
EpubBook.checkEpub('epubcheck-1.0.5.jar', rootDir + '.epub')
if __name__ == '__main__':
test()
#epub/create/toc.ncx
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
{% load epub_tags %}
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/"
version="2005-1">
<head>
<meta name="dtb:uid" content="urn:uuid:{{book.UUID}}"/>
<meta name="dtb:depth" content="{{book.getTocMapHeight}}"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>{{book.title}}</text>
</docTitle>
<navMap>
{% for node in book.getTocMapRoot.children %}
{% toc_nav_children_output node %}
{% endfor %}
</navMap>
</ncx>
#epub/create/content.opf
<?xml version="1.0" encoding="utf-8" standalone="no"?>
<opf:package xmlns:opf="http://www.idpf.org/2007/opf"
xmlns:dc="http://purl.org/dc/elements/1.1/"
unique-identifier="bookid" version="2.0">
<opf:metadata >
<dc:identifier id="bookid">urn:uuid:{{book.UUID}}</dc:identifier>
<dc:language>{{book.lang}}</dc:language>
<dc:title>{{book.title}}</dc:title>
{% for creator in book.creators %}
<dc:creator opf:role="{{creator.1}}">{{creator.0}}</dc:creator>
{% endfor %}
{% for tag in book.getMetaTags %}
{{tag.0|safe}}{{tag.1}}{{tag.2|safe}}
{% endfor %}
{% if book.coverImage %}
<opf:meta name="cover" content="{{book.coverImage.id}}"/>
{% endif %}
</opf:metadata>
<opf:manifest>
<opf:item id="ncxtoc" media-type="application/x-dtbncx+xml" href="toc.ncx"/>
{% for item in book.getAllItems %}
<opf:item id="{{item.id}}" media-type="{{item.mimeType}}" href="{{item.destPath}}"/>
{% endfor %}
</opf:manifest>
<opf:spine toc="ncxtoc">
{% for sitem in book.getSpine %}
<opf:itemref idref="{{sitem.1.id}}" linear="{{sitem.2|yesno}}"/>
{% endfor %}
</opf:spine>
{% if book.guide %}
<opf:guide>
{% for gitem in book.getGuide %}
<opf:reference href="{{gitem.0}}" type="{{gitem.2}}" title="{{gitem.1}}"/>
{% endfor %}
</opf:guide>
{% endif %}
</opf:package>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment