Skip to content

Instantly share code, notes, and snippets.

@caraya
Last active January 24, 2018 03:01
Show Gist options
  • Save caraya/6096612 to your computer and use it in GitHub Desktop.
Save caraya/6096612 to your computer and use it in GitHub Desktop.
Beginnings of a Python script to create basic opf content packages
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Requires the following libraries
# BeautifulSoup4
# Code based on: http://pydanny.com/generating-ncx-files-with-python.html
# Use os.path.exists() to test if a directory exists
# Use glob.glob('directory/path/*) to get the listing of a directory
class epub:
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
import os
def generateList(dir):
mimetypes = [
[".jpg", "image/jpg"],
[".png", "image/png"],
[".xhtml", "application/xhtml+xml"],
[".ncx", "application/x-dtbncx+xml"],
[".css", "text/css"],
[".otf", "application/vnd.ms-opentype"], #Open type fonts
[".mp4", "video/mp4"], # MP4 video
[".js", "text/javascript"]
]
result = []
for dirpath, dirnames, filenames in os.walk(dir):
for filename in filenames:
result.append(os.path.join(dirpath, filename))
# take the last 4 characters of the line and use as extension
# splitext also gives you the dot. The text extension is .txt
# not just plain txt. This was reflected in the mimetypes array
# above
extension = os.path.splitext(filename)[-1].lower()
# Search for extension in mimetypes
for extension in mimetypes:
# If the file extension exists in our extension map
# Append the mimetype to the result so we can use it in
# building the list
# Remember that lists in python are 0-based
# We know we can get the right data as a 2-dimensional array
# myextension = mimetypes[0][1]
# Will give me the first elements mimetype
# but it doesn't work with the data we have because
# Python expects list indices to be integers, not strings
# mime-extension = mimetypes([extension][1])
# We know that the first field is the extension, right?
# we then should be able to match mimetypes with something like
# if mimetypes[0] == extension:
# Do something
def createOPF(dir):
#defines root element and attributes
root = Element ('package')
root.set("xmlns", "http://www.idpf.org/2007/opf")
root.set("version", "3.0")
root.set("xml:lang", en)
#dc:title
dcTitle = subElement ("root", "dc:title")
metaTitle.set("xmlns:dc", "http://purl.org/dc/elements/1.1/")
dcTitle.text("Title of my book")
#title refinement
metaRefinesTitle = ("root", "meta")
metaRefinesTitle.set ("refines", "#title")
metaRefinesTitle.set ("property", "title-type")
metaRefinesTitle.text ("main")
#Information about copyright, plain string
dcRights = subElement ("root", "dc:rights")
dcRights.text ("Enter your copyrights statement here")
# Book Identifier
dcIdentifier = subElement ("root", "dc:identifier")
dcIdentifies.set ("id", "bookid")
dcIdentifier.text("Big ass book title")
# What kind of identifier are we using
dcIdentifier.set ("refines", "#bookid")
dcIdentifier.set ("property", "identifier-type")
dcIdentifier.set ("scheme", "xsd:string")
dcIdentifier.text ("41")
#dcterms:modified is the other part of the book ID
metaDCtermsModified = subElement ("root", "meta")
metaDCtermsModified.set ("property", "dcterms:modified")
# ePub compliant date format
metaDCtermsModified.text ("2012-04-09T12:00:00Z")
dcCreator1 = subElement = ("root", "dc:creator")
dcCreator1.set = ("id", "creator")
dcCreator.text ("Enter Author Name Here")
#refines dc:creator with files-as
metaRefinesAuthor.set ("refines", "#creator")
metaRefinesAuthor.set ("property", "file-as")
metaRefinesAuthor.text ("Last Name, First Name")
# refines dc:creator with role
# For the 3-letter relator code, see
# http://www.loc.gov/marc/relators/relacode.html
metaRefinesAuthor.set ("refines", "#creator")
metaRefinesAuthor.set ("property", "role")
metaRefinesauthor.set ("schema", "marc:relators")
metaRefinesAuthor.text ("aut")
# Globarl language descriptor
dcLanguage = subElement ("root", "dc:language")
dcLanguage.text ("en")
# Publisher
dcPublisher = subElement ("root", "dc:publisher")
dcLanguage.text ("Carlos Araya")
# Book subject
dcSubject = subElement ("root", "dc:subject")
dcSubject.text ("Book Publishing")
bookManifest = subElement ("root", "manifest")
generateList(OEBPS)
# if __name__ == "__main__":
# createOPF(OEBPS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment