Last active
March 5, 2017 11:52
-
-
Save snipsnipsnip/61ffb688ea60051537f1fd4431e1da50 to your computer and use it in GitHub Desktop.
pdf2png: runs 'mutool extract' and renumbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# * coding: utf-8 * | |
class PDFExtract | |
def self.main | |
ARGV.each {|a| new.extract a } | |
end | |
def extract(pdf) | |
images = parse_info(`mutool info #{pdf}`) | |
images.each do |img| | |
if nonempty?(img.renumbered_name) | |
puts "skipping #{img}" | |
else | |
unless nonempty?(img.extracted_name) | |
cmd = %{mutool extract -r "#{pdf}" "#{img.id}"} | |
system cmd | |
$? == 0 or raise "#{cmd} failed: #$?" | |
end | |
puts "renaming #{img}" | |
File.rename(img.extracted_name, img.renumbered_name) | |
end | |
end | |
end | |
private | |
def nonempty?(name) | |
File.size?(name).to_i > 0 | |
end | |
def parse_info(str) | |
/^Images \((?<pages>\d+)/m =~ str or raise "parse error" | |
images = $'.scan(/(?:DevGray|DevRGB|ICC|RGB|CMYK) \((\d+)/).each_with_index.map {|s, i| Image.new(i + 1, Integer(s[0])) } | |
images.size == Integer(pages) or raise "unexpected parse result: #{images.size} RGBs vs #{pages} images" | |
images | |
end | |
Image = Struct.new(:page, :id) do | |
def to_s | |
"#{extracted_name} -> #{renumbered_name}" | |
end | |
def extracted_name | |
"img-%04d.png" % id | |
end | |
def renumbered_name | |
"%04d.png" % page | |
end | |
end | |
end | |
PDFExtract.main if $0 == __FILE__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment