Skip to content

Instantly share code, notes, and snippets.

@venj
Created January 23, 2011 02:40
Show Gist options
  • Save venj/791758 to your computer and use it in GitHub Desktop.
Save venj/791758 to your computer and use it in GitHub Desktop.
Working in progress script used to create ePub xml files from CHM hhc file.
#!/usr/bin/env ruby
$KCODE = 'UTF8'
require 'rubygems'
require 'hpricot'
require 'fileutils'
require 'builder'
=begin
def print_tree(ul, level)
ul.search("/li").each do |li|
puts "\t" * level + li.search("object/param[@name='Name']").first.attributes["value"]
inner_ul = li.at("/ul")
unless inner_ul.nil?
inner_level = level + 1
print_tree(inner_ul, inner_level)
end
end
end
=end
if ARGV.size != 2
puts "Usage: #{File.basename(__FILE__)} filename.hhc epub_data_dir"
exit 1
end
begin
hhcfile = open ARGV[0]
rescue Exception => e
puts "File: \"#{ARGV[0]}\" is not exist."
exit 1
end
ARGV[1].nil? ? epub_data_dir = FileUtils.pwd : epub_data_dir = ARGV[1]
FileUtils.cd epub_data_dir do
# Common Variables
uuid = `uuidgen`.strip.downcase
book_title = "Advanced Programming in the UNIX Environment, 2nd Edition"
language = "en-US"
rights = "Commercial"
isbn = "0-20-143307-9"
subjects = "C, Unix, gcc, Linux"
author = "W. Richard Stevens, Stephen A. Rago"
publisher = "Addison Wesley Professional"
html_dir = "OEBPS"
cover = "cover.png"
# toc.ncx
doc = Hpricot(hhcfile)
@@order = 0
xml = Builder::XmlMarkup.new(:indent => 2)
xml.instruct!(:xml, :encoding => "UTF-8")
xml.ncx :xmlns => "http://www.daisy.org/z3986/2005/ncx/", :version => "2005-1" do |ncx|
ncx.head do |head|
head.meta :name => "dtb:uid", :content => uuid # Change later
head.meta :name => "dtb:depth", :content => "-1" # Change later
head.meta :name => "dtb:totalPageCount", :content => "0"
head.meta :name => "dtb:maxPageNumber", :content => "0"
end
ncx.docTitle do |docTitle|
docTitle.text book_title # Doc title
end
@@order = 1
# Inner function to do recursive
def build_struct(builder, ul)
ul.search("/li").each do |li|
name = li.search("object/param[@name='Name']").first.attributes["value"]
src = li.search("object/param[@name='Local']").first.attributes["value"]
# Make a navPoint Anyway
builder.navPoint(:id => File.basename(src).gsub(".", "_"), :playOrder => "#{@@order}") do |navPoint|
navPoint.navLabel do |navLabel|
navLabel.text name
end
navPoint.content(:src => src.split("/").last) # Fixme detecting folder structure!!!
# Increment the navPoint number
@@order += 1
# Do inner ul generation
inner_ul = li.at("/ul")
unless inner_ul.nil?
build_struct(navPoint, inner_ul)
end
end
end
end
# Generate Table of Content
ncx.navMap do |navMap|
build_struct(navMap, doc.at("body/ul"))
end
end
# Write to a file
ncxfile = open "OEBPS/toc.ncx", "w"
ncxfile.puts xml.target!
ncxfile.close
#content.opf
mimetypes = {
".ncx" => "application/x-dtbncx+xml",
".html" => "application/xhtml+xml",
".xhtml" => "application/xhtml+xml",
".htm" => "application/xhtml+xml",
".css" => "text/css",
".jpg" => "image/jpeg",
".jpeg" => "image/jpeg",
".jpe" => "image/jpeg",
".png" => "image/png",
".gif" => "image/gif",
".svg" => "image/svg+xml"
}
xml = Builder::XmlMarkup.new(:indent => 2)
xml.instruct!(:xml, :encoding => "UTF-8")
xml.package :version => "2.0", :xmlns => "http://www.idpf.org/2007/opf",
:"unique-identifier" => "BookId" do |package|
package.metadata :"xmlns:dc" => "http://purl.org/dc/elements/1.1/",
:"xmlns:opf" => "http://www.idpf.org/2007/opf" do |metadata|
metadata.__send__ "dc:title", book_title
metadata.__send__ "dc:language", language
metadata.__send__ "dc:rights", rights
metadata.__send__ "dc:identifier", uuid, {:id => "BookId"}
metadata.__send__ "dc:identifier", isbn, {:"opf:scheme" => "ISBN"}
subjects.split(",").each do |sub|
metadata.__send__ "dc:subject", sub.strip
end
metadata.__send__ "dc:creator", author, {:"opf:role" => "aut"}
metadata.__send__ "dc:publisher", publisher
# Cover "content" should be the same to the cover image id
metadata.meta :name => "cover", :content => cover.gsub(".", "_")
end
package.manifest do |manifest|
# If ncx file not generated when script created, then, uncomment following line
manifest.item :id => "ncx", :href => "toc.ncx", :"media-type" => mimetypes[".ncx"]
FileUtils.cd(html_dir) do
Dir["**/*"].each do |filename|
ext = File.extname(filename).downcase # Downcase it to mach the dict key
next if !File.file?(filename)
next if ext == "opf"
next if ext == "ncx"
next if ext == "" || mimetypes[ext].nil?
# generate ID with the file path to ensure uniqness
manifest.item :id => filename.gsub(/[\/.\[\]]/, "_").reverse , :href => filename, :"media-type" => mimetypes[ext]
end
end
end
package.spine :toc => "ncx" do |spine|
doc.search("li").each do |li|
src = li.search("object/param[@name='Local']").first.attributes["value"]
# idref should get rid of the abs path from the hhc file, so use basename to precess
spine.itemref :idref => File.basename(src).gsub(/[\/.]/, "_").reverse
end
end
end
#puts xml.target!
# Write to a file
opffile = open "OEBPS/content.opf", "w"
opffile.puts xml.target!
opffile.close
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment