-
-
Save imudak/788127 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Rakefile for self publishing ebook of Kindle3 made by scanning paper book. | |
# | |
# modify parameters by your environment: | |
# SRC (must): source PDF file name. | |
# TOP, BOTTOM, LEFT, RIGHT: default margins (pixel) of trimming. | |
# SIZE: adjust image size by destination format. | |
# LEVEL (optional): level option of ImageMagic. | |
# | |
# for Debian or Ubuntu user, needs packages below: | |
# ppler-utils poppler-data imagemagick pdftk | |
# | |
SRC = (ENV['SRC']||`ls *.pdf|head -n 1|tr -d "\n"`) | |
TYPE = (ENV['TYPE']||"bunko") | |
TRIM = "-fuzz 50% -trim" | |
case TYPE | |
when "bunko" | |
TOP = 0 | |
BOTTOM = 0 | |
LEFT = 0 | |
RIGHT = 0 | |
LEVEL = '0%,100%,0.3' | |
SIZE = 'x735' # for small books reading portrait style | |
when "comic" | |
TOP = 0 | |
BOTTOM = 0 | |
LEFT = 0 | |
RIGHT = 0 | |
LEVEL = '0%,100%,0.5' | |
SIZE = 'x735' # for small books reading portrait style | |
when "landscape" | |
TOP = 0 | |
BOTTOM = 0 | |
LEFT = 0 | |
RIGHT = 0 | |
LEVEL = '0%,100%,0.5' | |
SIZE = '722' # for large books reading landscape style | |
when "slide" | |
TOP = 0 | |
BOTTOM = 0 | |
LEFT = 0 | |
RIGHT = 0 | |
LEVEL = '0%,100%,0.3' | |
SIZE = 'x735' # for small books reading portrait style | |
when "custom" | |
TOP = (ENV['TOP']||0) | |
BOTTOM = (ENV['BOTTOM']||0) | |
LEFT = (ENV['LEFT']||0) | |
RIGHT = (ENV['RIGHT']||0) | |
LEVEL = (ENV['LEVEL']||'0%,100%,0.3') | |
SIZE = (ENV['SIZE']||'x735') | |
when "test" | |
TOP = (ENV['TOP']||0) | |
BOTTOM = (ENV['BOTTOM']||0) | |
LEFT = (ENV['LEFT']||0) | |
RIGHT = (ENV['RIGHT']||0) | |
LEVEL = (ENV['LEVEL']||'0%,100%,0.3') | |
SIZE = (ENV['SIZE']||'x735') | |
TRIM = "" | |
else | |
# default | |
TYPE="default" | |
TOP = 0 | |
BOTTOM = 0 | |
LEFT = 0 | |
RIGHT = 0 | |
LEVEL = '0%,100%,0.5' | |
SIZE = 'x735' # for small books reading portrait style | |
end | |
# original gist memo. | |
# SIZE = 'x693' # for generating mobi, portrait style only | |
#LEVEL = '0%,100%' | |
#--------------------------------------------------------- | |
PGM_DIR = './pgm'; directory PGM_DIR | |
PNG_DIR = './png'; directory PNG_DIR | |
PDF_DIR = './pdf'; directory PDF_DIR | |
DST = SRC.sub( /\.pdf$/, ".#{TYPE}.pdf" ) | |
MOBI = SRC.sub( /\.pdf$/, '.mobi' ) | |
OPF = SRC.sub( /\.pdf$/, '.opf' ) | |
HTML = SRC.sub( /\.pdf$/, '.html' ) | |
LOG = SRC.sub( /\.pdf$/, '.log' ) | |
CONVINFO = './convdata.txt' | |
# log | |
sh "LANG=C date >> #{LOG}" | |
CMD = "SRC=#{SRC} TYPE=#{TYPE} TOP=#{TOP} BOTTOM=#{BOTTOM} LEFT=#{LEFT} RIGHT=#{RIGHT} LEVEL=#{LEVEL} SIZE=#{SIZE} rake" | |
sh "echo #{CMD} >> #{LOG}" | |
sh "echo InfoKey: ConvInfoCmd > #{CONVINFO}" | |
sh "echo InfoValue: #{CMD} >> #{CONVINFO}" | |
def count_pages | |
open( "|pdfinfo #{SRC}", 'r:utf-8', &:read ).scan( /^Pages:\s*(\d+)/ ).flatten[0].to_i | |
end | |
def book_title | |
open( "|pdfinfo #{SRC}", 'r:utf-8', &:read ).scan( /^Title:\s*(.+)$/ ).flatten[0] | |
end | |
def book_author | |
open( "|pdfinfo #{SRC}", 'r:utf-8', &:read ).scan( /^Author:\s*(.+)$/ ).flatten[0] | |
end | |
def image_list( dir, ext, count ) | |
[].tap do |l| | |
if count < 100 then | |
1.upto( count ) do |i| | |
l << "#{dir}/tmp-#{'%02d' % i}.#{ext}" | |
end | |
else | |
1.upto( count ) do |i| | |
l << "#{dir}/tmp-#{'%03d' % i}.#{ext}" | |
end | |
end | |
end | |
end | |
def width( png ) | |
open( "|identify -verbose #{png}", 'r:utf-8', &:read).scan( /Page geometry:\s+(\d+)x.*/).flatten[0].to_i | |
end | |
def checkpng( png ) | |
w = width(png) | |
if w > 10000 then | |
sh "convert -size 1x1 xc:white #{png}" | |
end | |
end | |
def pgm2png( pgm, png ) | |
sh "convert #{pgm} -level '#{LEVEL}' \ | |
-chop #{LEFT}x#{TOP} \ | |
-gravity SouthEast -chop #{RIGHT}x#{BOTTOM}\ | |
-gravity NorthWest #{TRIM} -resize #{SIZE} #{png}" | |
# -gravity NorthWest -fuzz 50% -trim -resize #{SIZE} #{png}" | |
checkpng(png) | |
# s = `convert #{pgm} -level '#{LEVEL}' \ | |
# -chop #{LEFT}x#{TOP} \ | |
# -gravity SouthEast -chop #{RIGHT}x#{BOTTOM}\ | |
# -gravity NorthWest -fuzz 50% -trim -resize #{SIZE} #{png} 2>&1` | |
# if s.length > 0 then | |
# sh "convert -size 1x1 xc:white #{png}" | |
# end | |
end | |
pages = count_pages | |
PGMS = image_list( PGM_DIR, 'pgm', pages ) | |
PNGS = image_list( PNG_DIR, 'png', pages ) | |
PNGS.each_with_index do |png, i| | |
file PNGS[i] => [PNG_DIR, PGMS[i]] do |t| | |
pgm2png( t.prerequisites[1], t.name ) | |
end | |
file PGMS[i] => [PGM_DIR, SRC] do | |
unless File::exist?( PGMS[-1] ) then | |
sh "pdftoppm -r 300 -gray #{SRC} #{PGM_DIR}/tmp" | |
end | |
end | |
end | |
task :default => :pdf | |
desc 'generate pdf file by concat all png files.' | |
task :pdf => DST | |
file DST => [PDF_DIR, 'metadata.txt'] + PNGS do | |
pdf_list = [] | |
i = 0 | |
src_pngs = PNGS[i, 50] | |
while src_pngs do | |
pdf_list << "#{PDF_DIR}/#{i}.pdf" | |
sh "convert #{src_pngs.join ' '} -quality 50 #{pdf_list[-1]}" | |
src_pngs = PNGS[i += 50, 50] | |
end | |
sh "pdftk #{pdf_list.join ' '} cat output #{PDF_DIR}/#{DST}" | |
# sh "pdftk #{PDF_DIR}/#{DST} update_info metadata.txt output #{DST}" | |
sh "cat metadata.txt #{CONVINFO} > updateinfo.txt" | |
sh "pdftk #{PDF_DIR}/#{DST} update_info updateinfo.txt output #{DST}" | |
sh "rm updateinfo.txt" | |
end | |
desc 'generate metadata file from source pdf.' | |
task :metadata => 'metadata.txt' | |
file 'metadata.txt' => SRC do |t| | |
sh "pdftk #{t.prerequisites.join ' '} dump_data output ./#{t.name}" | |
end | |
desc 'crop pgm files to png files.' | |
task :png => [PNG_DIR] + PNGS | |
rule '.png' => '.pgm' do |t| | |
pgm2png( t.prerequisites[0], t.name ) | |
end | |
desc 'extract image files from source pdf.' | |
task :pgm => [PGM_DIR, SRC] + PGMS | |
desc 'cleanap pgm images.' | |
task 'clean-pgm' do | |
begin | |
rm PGMS | |
rescue | |
end | |
end | |
desc 'cleanap png images.' | |
task 'clean-png' do | |
begin | |
rm PNGS | |
rescue | |
end | |
end | |
desc 'cleanap temporaly pdf files.' | |
task 'clean-pdf' do | |
rm FileList["#{PDF_DIR}/*.pdf"] | |
end | |
desc 'cleanap all tmp files.' | |
task :clean => ['clean-png', 'clean-pgm', 'clean-pdf'] do | |
if FileTest.exist?('metadata.txt') then | |
rm 'metadata.txt' | |
end | |
rm [HTML, OPF] | |
rmdir PGM_DIR | |
rmdir PNG_DIR | |
rmdir PDF_DIR | |
end | |
desc 'generate MOBI file.' | |
task :mobi => [OPF, HTML] + PNGS do |t| | |
sh "kindlegen #{OPF} -unicode -o #{MOBI}" | |
end | |
rule '.opf' => '.pdf' do |t| | |
opf = <<-OPF.gsub( /^\t/, '' ) | |
<?xml version="1.0" encoding="utf-8"?> | |
<package unique-identifier="uid"> | |
<metadata> | |
<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core" | |
xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/"> | |
<dc:Title>#{book_title}</dc:Title> | |
<dc:Language>en-US</dc:Language> | |
<dc:Creator>#{book_author}</dc:Creator> | |
<dc:Date>#{Time::now.strftime '%m/%d/%Y'}</dc:Date> | |
</dc-metadata> | |
<x-metadata> | |
<output encoding="utf-8" content-type="text/x-oeb1-document"></output> | |
<EmbeddedCover>#{PNGS[0]}</EmbeddedCover> | |
</x-metadata> | |
</metadata> | |
<manifest> | |
<item id="contents" media-type="text/html" href="#{HTML}"></item> | |
</manifest> | |
<spine> | |
<itemref idref="contents" /> | |
</spine> | |
<tours></tours> | |
<guide> | |
<reference type="start" title="contents" href="#{HTML}"></reference> | |
</guide> | |
</package> | |
OPF | |
open( t.name, 'w:utf-8' ){|f| f.write opf} | |
end | |
rule '.html' => '.pdf' do |t| | |
html = <<-HTML.gsub( /^\t/, '' ) | |
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | |
<html lang="ja-JP"> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> | |
<title>#{book_title}</title> | |
</head> | |
<body style="text-align: right;"> | |
#{PNGS.map{|j| %Q|<img style="height: 100%;" src="#{j}" />|}.join "<mbp:pagebreak />\n\t\t"} | |
</body> | |
</html> | |
HTML | |
open( t.name, 'w:utf-8' ){|f| f.write html} | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment