Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Create EPUB files with Python
<?xml version='1.0' encoding='UTF-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
</rootfiles>
</container>
<?xml version='1.0' encoding='UTF-8'?>
<package xmlns="http://www.idpf.org/2007/opf" prefix="rendition: http://www.ipdf.org/vocab/rendition/#" unique-identifier="uuid_id" version="3.0">
<metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/">
<meta property="dcterms:modified"/>
<meta name="cover" content="cover"/>
<dc:title/>
<dc:creator/>
<dc:date/>
<dc:identifier id="uuid_id" opf:scheme="uuid"/>
<dc:language/>
</metadata>
<manifest>
<item href="stylesheet.css" id="stylesheet" media-type="text/css"/>
<item href="toc.ncx" id="toc" media-type="application/x-dtbncx+xml"/>
<item href="cover.png" id="cover" media-type="image/png" properties="cover-image"/>
</manifest>
<spine toc="toc"/>
</package>
#!/usr/bin/env python3
"""
Create Epub files.
This code was designed to provide a very simple and straight-forward API for
creating epub files, by sacrificing most of the versatility of the format.
Example usage:
>>> book = Book(title='Example Book', author='John Doe')
>>> with open('cover.png', 'br') as file:
>>> book.add_cover(file.read())
>>> with open('style.css') as file:
>>> book.add_stylesheet(file.read())
>>> book.add_page(title='First Page', content='some text')
>>> chapter = book.add_page(title='First Chapter', content='more text')
>>> book.add_page(
>>> title='Sub-Page 1',
>>> content='first subpage of the chapter',
>>> parent=chapter)
>>> with open('image.jpg', 'br') as file:
>>> book.add_image('image.jpg', file.read())
>>> book.save('example.epub')
"""
###############################################################################
# Module Imports
###############################################################################
import arrow
import collections
import itertools
import logging
import lxml.etree
import lxml.html
import pathlib
import pkgutil
import tempfile
import uuid
import zipfile
###############################################################################
log = logging.getLogger(__name__)
###############################################################################
class ETreeWrapper:
"""Convinience wrapper around xml trees."""
def __init__(self, *args, namespaces, **kwargs):
self.tree = lxml.etree.ElementTree(*args, **kwargs)
self.namespaces = namespaces
def __call__(self, tag='*', **kwargs):
path = './/{}'.format(tag)
for key, value in kwargs.items():
path += '[@{}="{}"]'.format(key, value)
return self.tree.find(path, namespaces=self.namespaces)
def __getattr__(self, name):
return getattr(self.tree, name)
def write(self, path):
self.tree.write(str(path), xml_declaration=True,
encoding='UTF-8', pretty_print=True)
def template(name):
"""Get file template."""
with open(name) as file:
template = file.read()
return ETreeWrapper(
lxml.etree.fromstring(
template,
lxml.etree.XMLParser(remove_blank_text=True)),
namespaces=dict(
opf='http://www.idpf.org/2007/opf',
dc='http://purl.org/dc/elements/1.1/',
xhtml='http://www.w3.org/1999/xhtml',
ncx='http://www.daisy.org/z3986/2005/ncx/'))
def flatten(tree):
for item in tree:
yield item
yield from flatten(item.children)
###############################################################################
Page = collections.namedtuple('Page', 'uid title children')
Image = collections.namedtuple('Image', 'name type')
class Book:
"""Wrapper around a epub archive."""
def __init__(self, **kwargs):
self.tempdir = tempfile.TemporaryDirectory()
self.root = []
self.images = []
self.uid_generator = map('{:04}'.format, itertools.count(1))
self.path = pathlib.Path(self.tempdir.name).resolve()
(self.path / 'pages').mkdir()
(self.path / 'images').mkdir()
self.title = kwargs.get('title', 'Untitled')
self.language = kwargs.get('language', 'en')
self.author = kwargs.get('author', 'Unknown Author')
def add_page(self, title, content, parent=None):
"""Add a new page/chapter to the root of the book."""
log.info('New page: {}'.format(title))
page = Page(next(self.uid_generator), title, [])
self.root.append(page) if not parent else parent.children.append(page)
file = template('page.xhtml')
file('xhtml:title').text = title
file('xhtml:body').append(lxml.html.fromstring(content))
file.write(self.path / 'pages' / (page.uid + '.xhtml'))
return page
def add_image(self, name, data):
log.info('New image: {}'.format(name))
if name.endswith('.jpg'):
media_type = 'image/jpeg'
if name.endswith('.png'):
media_type = 'image/png'
self.images.append(Image(name, media_type))
with open(str(self.path / 'images' / name), 'wb') as file:
file.write(data)
def add_cover(self, data):
with open(str(self.path / 'cover.png'), 'wb') as file:
file.write(data)
def add_stylesheet(self, data):
with open(str(self.path / 'stylesheet.css'), 'w') as file:
file.write(data)
def save(self, filename):
self._write_spine()
self._write_container()
self._write_toc()
with open(str(self.path / 'mimetype'), 'w') as file:
file.write('application/epub+zip')
with zipfile.ZipFile(filename, 'w') as archive:
archive.write(
str(self.path / 'mimetype'), 'mimetype',
compress_type=zipfile.ZIP_STORED)
for file in self.path.rglob('*.*'):
archive.write(
str(file), str(file.relative_to(self.path)),
compress_type=zipfile.ZIP_DEFLATED)
log.info('Book saved: {}'.format(self.title))
def _write_spine(self):
spine = template('content.opf')
now = arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss')
spine(property='dcterms:modified').text = now
spine('dc:date').text = now
spine('dc:title').text = self.title
spine('dc:creator').text = self.author
spine('dc:language').text = self.language
spine(id='uuid_id').text = str(uuid.uuid4())
for page in flatten(self.root):
lxml.etree.SubElement(
spine('opf:manifest'), 'item',
href='pages/{}.xhtml'.format(page.uid), id=page.uid,
**{'media-type': 'application/xhtml+xml'})
lxml.etree.SubElement(
spine('opf:spine'), 'itemref', idref=page.uid)
for uid, image in enumerate(self.images):
lxml.etree.SubElement(
spine('opf:manifest'),
'item',
href='images/' + image.name,
id='img{:03}'.format(uid + 1),
**{'media-type': image.type})
spine.write(self.path / 'content.opf')
def _write_container(self):
container = template('container.xml')
meta_inf = self.path / 'META-INF'
meta_inf.mkdir()
container.write(meta_inf / 'container.xml')
def _write_toc(self):
toc = template('toc.ncx')
toc('ncx:text').text = self.title
for page in self.root:
self._page_to_toc(page, toc('ncx:navMap'))
toc.write(self.path / 'toc.ncx')
def _page_to_toc(self, page, node):
navpoint = lxml.etree.SubElement(
node, 'navPoint', id=page.uid, playOrder=page.uid.lstrip('0'))
navlabel = lxml.etree.SubElement(navpoint, 'navLabel')
lxml.etree.SubElement(navlabel, 'text').text = page.title
lxml.etree.SubElement(
navpoint, 'content', src='pages/{}.xhtml'.format(page.uid))
for child in page.children:
self._page_to_toc(child, navpoint)
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" epub:prefix="z3998: http://www.daisy.org/z3998/2012/vocab/structure/#" lang="en" xml:lang="en">
<head>
<title/>
<link href="../stylesheet.css" rel="stylesheet" type="text/css"/>
</head>
<body/>
</html>
<?xml version='1.0' encoding='UTF-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
<head>
<meta content="" name="dtb:uid"/>
<meta content="0" name="dtb:depth"/>
<meta content="0" name="dtb:totalPageCount"/>
<meta content="0" name="dtb:maxPageNumber"/>
</head>
<docTitle>
<text/>
</docTitle>
<navMap/>
</ncx>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment