Skip to content

Instantly share code, notes, and snippets.

@sck
Created June 24, 2012 11:23
Show Gist options
  • Save sck/2982867 to your computer and use it in GitHub Desktop.
Save sck/2982867 to your computer and use it in GitHub Desktop.
Use ScanSnap's 'Scan to Searchable PDF' even with PDFs not created by ScanSnap
#! /usr/bin/env ruby
require 'tmpdir'
def fixed?(fn) fn =~ /\.searchable\.pdf/; end
def searchable_fn(fn)
return fn if fixed?(fn)
"#{File.dirname(fn)}/#{File.basename(fn, File.extname(fn))}.searchable.pdf"
end
def have_fix?(fn)
return false if fixed?(fn)
return File.exists?(searchable_fn(fn))
end
$fn = ARGV.shift
$fn = searchable_fn($fn) if have_fix?($fn)
def needs_creator_fixed?
info=`pdfinfo #{$fn.inspect}`.split("\n").
grep(/Creator/).first !~ /ABBYY FineReader/
end
def fix_creator
log "Fixing creator"
Dir.mktmpdir {|d|
pdftk_desc = "#{d}/pdftk_desc.txt"
File.open(pdftk_desc, "w") {|w|
w.puts(<<EODESC)
InfoKey: Creator
InfoValue: ABBYY FineReader for ScanSnap (Mac edition)
EODESC
}
nfn = searchable_fn($fn)
system "pdftk #{$fn.inspect} update_info #{pdftk_desc} output #{nfn.inspect}"
$fn = nfn
}
end
def does_pdf_file_contain_text?
text=`pdftotext #{$fn.inspect} /dev/stdout`
text =~ /\w+/
end
def log(m)
puts "[#{$0.gsub(/.*?(\w+$)/, "\\1")}] #{m}"
end
def make_searchable
log "Invoking PDF OCR"
system "open -a 'Scan to Searchable PDF' #{$fn.inspect}"
end
if !does_pdf_file_contain_text?
log "Not yet searchable"
if needs_creator_fixed?
fix_creator
raise "Failed to fix creator" if needs_creator_fixed?
end
make_searchable
$stdout.write "Hit enter when OCR is finished..."
$stdin.gets
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment