Skip to content

Instantly share code, notes, and snippets.

@WillemJan
Created July 28, 2021 13:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save WillemJan/98aec77b9b0c5fed4ea322205891c49b to your computer and use it in GitHub Desktop.
Save WillemJan/98aec77b9b0c5fed4ea322205891c49b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# © Mikael Frykholm <mikael@frykholm.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import zipfile
from xml.etree import ElementTree
from lxml import html
import genshi.template
import os
import epub_meta
def get_epub_info(fname):
ns = {
'n':'urn:oasis:names:tc:opendocument:xmlns:container',
'pkg':'http://www.idpf.org/2007/opf',
'dc':'http://purl.org/dc/elements/1.1/'
}
# prepare to read from the .epub file
zip = zipfile.ZipFile(fname)
fnameo = fname
# find the contents metafile
txt = zip.read('META-INF/container.xml')
tree = ElementTree.fromstring(txt)
cfname = tree.find('n:rootfiles/n:rootfile',namespaces=ns).attrib['full-path']
print('reading ' + cfname)
# grab the metadata block from the contents metafile
cf = zip.read(cfname)
tree = ElementTree.fromstring(cf)
p = tree.find('pkg:metadata',namespaces=ns)
manifest = tree.find('pkg:manifest',namespaces=ns)
try:
coverid = p.findall('pkg:meta/[@name="cover"]',namespaces=ns)[0].attrib['content']
coverpath = manifest.find('pkg:item[@id="%s"]'%coverid, namespaces=ns).attrib['href']
except(IndexError,AttributeError):
coverpath=None
print("NO COVER")
for i in tree.iter():
if i.tag.endswith('reference'):
if i.attrib.get('title') == 'cover':
fname = i.get('href')
try:
fname = 'OEBPS' + os.path.sep + str(fname)
cc = zip.read(fname)
ct = html.fromstring(cc)
for i in ct.iter():
if i.attrib.get('alt') == 'cover':
coverpath = 'OEBPS' + os.path.sep + i.attrib.get('src')
except:
pass
try:
cover=zip.read(coverpath)
print(type(cover))
except(KeyError):
cover = None
if cover is None:
import base64
print(fnameo)
metadata = epub_meta.get_epub_metadata(fnameo)
if metadata.get('cover_image_content') is not None:
cover = base64.b64decode(metadata.get('cover_image_content'))
print('yea!')
# repackage the data
res = {}
res['filename'] = fname
res['cover'] = cover
# import pdb;pdb.set_trace()
for name in ['title','language','creator','date','identifier','description']:
if p.find('dc:'+name, namespaces=ns) is not None:
res[name] = p.find('dc:'+name, namespaces=ns).text
else:
res[name] = None
if p.find('dc:identifier[@pkg:scheme="uuid"]', namespaces=ns) is not None:
res['uuid'] = p.find('dc:identifier[@pkg:scheme="uuid"]', namespaces=ns).text
else:
res['uuid'] = None
zip.close()
return res
def generate_opds(books, output_dir, common_path):
if not os.path.exists(output_dir):
os.mkdir(output_dir)
f = open('opds.atom', 'w')
loader = genshi.template.TemplateLoader([os.curdir])
tmpl = loader.load('opds.xml')
tmpl_vars = {'books': books}
for book in books:
cpath = os.path.join(output_dir, '%s.jpg'%book['identifier'])
book['coverpath'] = ".covers/%s.jpg"%book['identifier']
if not os.path.exists(cpath) and book['cover']:
cover = open(cpath,'wb')
cover.write(book['cover'])
cover.close()
book['wwwpath'] = os.path.relpath(book['filename'],common_path)
return f.write(tmpl.generate(**tmpl_vars).render())
def find_epubs(dir):
for (root, dirs, files) in os.walk(dir):
for file in files:
if file.lower().endswith('.epub'):
yield(os.path.join(root,file))
if __name__ == "__main__":
import sys
books = []
for path in find_epubs(sys.argv[1]):
print(path)
books.append(get_epub_info(path))
generate_opds(books, '.covers', sys.argv[1])
print("Generated feed for %d books."%len(books))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment