Last active
January 24, 2018 03:01
-
-
Save caraya/6096612 to your computer and use it in GitHub Desktop.
Beginnings of a Python script to create basic opf content packages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# Requires the following libraries | |
# BeautifulSoup4 | |
# Code based on: http://pydanny.com/generating-ncx-files-with-python.html | |
# Use os.path.exists() to test if a directory exists | |
# Use glob.glob('directory/path/*) to get the listing of a directory | |
class epub: | |
from xml.etree.ElementTree import Element, SubElement, Comment, tostring | |
import os | |
def generateList(dir): | |
mimetypes = [ | |
[".jpg", "image/jpg"], | |
[".png", "image/png"], | |
[".xhtml", "application/xhtml+xml"], | |
[".ncx", "application/x-dtbncx+xml"], | |
[".css", "text/css"], | |
[".otf", "application/vnd.ms-opentype"], #Open type fonts | |
[".mp4", "video/mp4"], # MP4 video | |
[".js", "text/javascript"] | |
] | |
result = [] | |
for dirpath, dirnames, filenames in os.walk(dir): | |
for filename in filenames: | |
result.append(os.path.join(dirpath, filename)) | |
# take the last 4 characters of the line and use as extension | |
# splitext also gives you the dot. The text extension is .txt | |
# not just plain txt. This was reflected in the mimetypes array | |
# above | |
extension = os.path.splitext(filename)[-1].lower() | |
# Search for extension in mimetypes | |
for extension in mimetypes: | |
# If the file extension exists in our extension map | |
# Append the mimetype to the result so we can use it in | |
# building the list | |
# Remember that lists in python are 0-based | |
# We know we can get the right data as a 2-dimensional array | |
# myextension = mimetypes[0][1] | |
# Will give me the first elements mimetype | |
# but it doesn't work with the data we have because | |
# Python expects list indices to be integers, not strings | |
# mime-extension = mimetypes([extension][1]) | |
# We know that the first field is the extension, right? | |
# we then should be able to match mimetypes with something like | |
# if mimetypes[0] == extension: | |
# Do something | |
def createOPF(dir): | |
#defines root element and attributes | |
root = Element ('package') | |
root.set("xmlns", "http://www.idpf.org/2007/opf") | |
root.set("version", "3.0") | |
root.set("xml:lang", en) | |
#dc:title | |
dcTitle = subElement ("root", "dc:title") | |
metaTitle.set("xmlns:dc", "http://purl.org/dc/elements/1.1/") | |
dcTitle.text("Title of my book") | |
#title refinement | |
metaRefinesTitle = ("root", "meta") | |
metaRefinesTitle.set ("refines", "#title") | |
metaRefinesTitle.set ("property", "title-type") | |
metaRefinesTitle.text ("main") | |
#Information about copyright, plain string | |
dcRights = subElement ("root", "dc:rights") | |
dcRights.text ("Enter your copyrights statement here") | |
# Book Identifier | |
dcIdentifier = subElement ("root", "dc:identifier") | |
dcIdentifies.set ("id", "bookid") | |
dcIdentifier.text("Big ass book title") | |
# What kind of identifier are we using | |
dcIdentifier.set ("refines", "#bookid") | |
dcIdentifier.set ("property", "identifier-type") | |
dcIdentifier.set ("scheme", "xsd:string") | |
dcIdentifier.text ("41") | |
#dcterms:modified is the other part of the book ID | |
metaDCtermsModified = subElement ("root", "meta") | |
metaDCtermsModified.set ("property", "dcterms:modified") | |
# ePub compliant date format | |
metaDCtermsModified.text ("2012-04-09T12:00:00Z") | |
dcCreator1 = subElement = ("root", "dc:creator") | |
dcCreator1.set = ("id", "creator") | |
dcCreator.text ("Enter Author Name Here") | |
#refines dc:creator with files-as | |
metaRefinesAuthor.set ("refines", "#creator") | |
metaRefinesAuthor.set ("property", "file-as") | |
metaRefinesAuthor.text ("Last Name, First Name") | |
# refines dc:creator with role | |
# For the 3-letter relator code, see | |
# http://www.loc.gov/marc/relators/relacode.html | |
metaRefinesAuthor.set ("refines", "#creator") | |
metaRefinesAuthor.set ("property", "role") | |
metaRefinesauthor.set ("schema", "marc:relators") | |
metaRefinesAuthor.text ("aut") | |
# Globarl language descriptor | |
dcLanguage = subElement ("root", "dc:language") | |
dcLanguage.text ("en") | |
# Publisher | |
dcPublisher = subElement ("root", "dc:publisher") | |
dcLanguage.text ("Carlos Araya") | |
# Book subject | |
dcSubject = subElement ("root", "dc:subject") | |
dcSubject.text ("Book Publishing") | |
bookManifest = subElement ("root", "manifest") | |
generateList(OEBPS) | |
# if __name__ == "__main__": | |
# createOPF(OEBPS) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment