Created
January 23, 2011 02:40
-
-
Save venj/791758 to your computer and use it in GitHub Desktop.
Working in progress script used to create ePub xml files from CHM hhc file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
$KCODE = 'UTF8' | |
require 'rubygems' | |
require 'hpricot' | |
require 'fileutils' | |
require 'builder' | |
=begin | |
def print_tree(ul, level) | |
ul.search("/li").each do |li| | |
puts "\t" * level + li.search("object/param[@name='Name']").first.attributes["value"] | |
inner_ul = li.at("/ul") | |
unless inner_ul.nil? | |
inner_level = level + 1 | |
print_tree(inner_ul, inner_level) | |
end | |
end | |
end | |
=end | |
if ARGV.size != 2 | |
puts "Usage: #{File.basename(__FILE__)} filename.hhc epub_data_dir" | |
exit 1 | |
end | |
begin | |
hhcfile = open ARGV[0] | |
rescue Exception => e | |
puts "File: \"#{ARGV[0]}\" is not exist." | |
exit 1 | |
end | |
ARGV[1].nil? ? epub_data_dir = FileUtils.pwd : epub_data_dir = ARGV[1] | |
FileUtils.cd epub_data_dir do | |
# Common Variables | |
uuid = `uuidgen`.strip.downcase | |
book_title = "Advanced Programming in the UNIX Environment, 2nd Edition" | |
language = "en-US" | |
rights = "Commercial" | |
isbn = "0-20-143307-9" | |
subjects = "C, Unix, gcc, Linux" | |
author = "W. Richard Stevens, Stephen A. Rago" | |
publisher = "Addison Wesley Professional" | |
html_dir = "OEBPS" | |
cover = "cover.png" | |
# toc.ncx | |
doc = Hpricot(hhcfile) | |
@@order = 0 | |
xml = Builder::XmlMarkup.new(:indent => 2) | |
xml.instruct!(:xml, :encoding => "UTF-8") | |
xml.ncx :xmlns => "http://www.daisy.org/z3986/2005/ncx/", :version => "2005-1" do |ncx| | |
ncx.head do |head| | |
head.meta :name => "dtb:uid", :content => uuid # Change later | |
head.meta :name => "dtb:depth", :content => "-1" # Change later | |
head.meta :name => "dtb:totalPageCount", :content => "0" | |
head.meta :name => "dtb:maxPageNumber", :content => "0" | |
end | |
ncx.docTitle do |docTitle| | |
docTitle.text book_title # Doc title | |
end | |
@@order = 1 | |
# Inner function to do recursive | |
def build_struct(builder, ul) | |
ul.search("/li").each do |li| | |
name = li.search("object/param[@name='Name']").first.attributes["value"] | |
src = li.search("object/param[@name='Local']").first.attributes["value"] | |
# Make a navPoint Anyway | |
builder.navPoint(:id => File.basename(src).gsub(".", "_"), :playOrder => "#{@@order}") do |navPoint| | |
navPoint.navLabel do |navLabel| | |
navLabel.text name | |
end | |
navPoint.content(:src => src.split("/").last) # Fixme detecting folder structure!!! | |
# Increment the navPoint number | |
@@order += 1 | |
# Do inner ul generation | |
inner_ul = li.at("/ul") | |
unless inner_ul.nil? | |
build_struct(navPoint, inner_ul) | |
end | |
end | |
end | |
end | |
# Generate Table of Content | |
ncx.navMap do |navMap| | |
build_struct(navMap, doc.at("body/ul")) | |
end | |
end | |
# Write to a file | |
ncxfile = open "OEBPS/toc.ncx", "w" | |
ncxfile.puts xml.target! | |
ncxfile.close | |
#content.opf | |
mimetypes = { | |
".ncx" => "application/x-dtbncx+xml", | |
".html" => "application/xhtml+xml", | |
".xhtml" => "application/xhtml+xml", | |
".htm" => "application/xhtml+xml", | |
".css" => "text/css", | |
".jpg" => "image/jpeg", | |
".jpeg" => "image/jpeg", | |
".jpe" => "image/jpeg", | |
".png" => "image/png", | |
".gif" => "image/gif", | |
".svg" => "image/svg+xml" | |
} | |
xml = Builder::XmlMarkup.new(:indent => 2) | |
xml.instruct!(:xml, :encoding => "UTF-8") | |
xml.package :version => "2.0", :xmlns => "http://www.idpf.org/2007/opf", | |
:"unique-identifier" => "BookId" do |package| | |
package.metadata :"xmlns:dc" => "http://purl.org/dc/elements/1.1/", | |
:"xmlns:opf" => "http://www.idpf.org/2007/opf" do |metadata| | |
metadata.__send__ "dc:title", book_title | |
metadata.__send__ "dc:language", language | |
metadata.__send__ "dc:rights", rights | |
metadata.__send__ "dc:identifier", uuid, {:id => "BookId"} | |
metadata.__send__ "dc:identifier", isbn, {:"opf:scheme" => "ISBN"} | |
subjects.split(",").each do |sub| | |
metadata.__send__ "dc:subject", sub.strip | |
end | |
metadata.__send__ "dc:creator", author, {:"opf:role" => "aut"} | |
metadata.__send__ "dc:publisher", publisher | |
# Cover "content" should be the same to the cover image id | |
metadata.meta :name => "cover", :content => cover.gsub(".", "_") | |
end | |
package.manifest do |manifest| | |
# If ncx file not generated when script created, then, uncomment following line | |
manifest.item :id => "ncx", :href => "toc.ncx", :"media-type" => mimetypes[".ncx"] | |
FileUtils.cd(html_dir) do | |
Dir["**/*"].each do |filename| | |
ext = File.extname(filename).downcase # Downcase it to mach the dict key | |
next if !File.file?(filename) | |
next if ext == "opf" | |
next if ext == "ncx" | |
next if ext == "" || mimetypes[ext].nil? | |
# generate ID with the file path to ensure uniqness | |
manifest.item :id => filename.gsub(/[\/.\[\]]/, "_").reverse , :href => filename, :"media-type" => mimetypes[ext] | |
end | |
end | |
end | |
package.spine :toc => "ncx" do |spine| | |
doc.search("li").each do |li| | |
src = li.search("object/param[@name='Local']").first.attributes["value"] | |
# idref should get rid of the abs path from the hhc file, so use basename to precess | |
spine.itemref :idref => File.basename(src).gsub(/[\/.]/, "_").reverse | |
end | |
end | |
end | |
#puts xml.target! | |
# Write to a file | |
opffile = open "OEBPS/content.opf", "w" | |
opffile.puts xml.target! | |
opffile.close | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment