Skip to content

Instantly share code, notes, and snippets.

@retrography
Created September 20, 2017 11:07
Show Gist options
  • Save retrography/f5cb4e84d53150ea896cff73a9782e35 to your computer and use it in GitHub Desktop.
Save retrography/f5cb4e84d53150ea896cff73a9782e35 to your computer and use it in GitHub Desktop.
Transforms jbig2-encoded images into PDF - A Ruby port of https://github.com/agl/jbig2enc/blob/master/pdf.py
#!/usr/bin/env ruby
# Copyright 2017 Mahmood S. Zargar
# Author: mahmood@gmail.com (Mahmood S. Zargar)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a Ruby port of "pdf.py" published under the same license
# Copyright 2006 Google Inc.
# Author: agl@imperialviolet.org (Adam Langley)
# JBIG2 Encoder
# https://github.com/agl/jbig2enc
dpi = 72
class Dict
attr_accessor :d
def initialize(values = {})
@d = {}
@d.update(values)
end
def to_s
s = ['<< ']
@d.each do |x, y|
s << '/%s ' % x
s << y.to_s
s << "\n"
end
s << ">>\n"
s.join
end
end
$global_next_id = 1
class Obj
attr_accessor :id
attr_accessor :d
def initialize(d = {}, stream = nil)
if !stream.nil?
d['Length'] = stream.size.to_s
end
@d = Dict.new(d)
@stream = stream
@id = $global_next_id
$global_next_id = $global_next_id + 1
end
def to_s
s = []
s << @d.to_s
if !@stream.nil?
s << "stream\n"
s << @stream
s << "\nendstream\n"
end
s << "endobj\n"
s.join
end
end
class Doc
def initialize
@objs = []
@pages = []
end
def add_object(o)
@objs << o
o
end
def add_page(o)
@pages << o
add_object(o)
end
def to_s
a = []
j = [0]
offsets = []
add = lambda do |x|
a << x
j[0] = j[0] + x.size + 1
end
add.call('%PDF-1.4')
for o in @objs
offsets << j[0]
add.call('%i 0 obj' % o.id)
add.call(o.to_s)
end
xrefstart = j[0]
a << 'xref'
a << '0 %i' % (offsets.size + 1)
a << '0000000000 65535 f '
for o in offsets
a << '%010i 00000 n ' % o
end
a << ''
a << 'trailer'
a << "<< /Size %i\n/Root 1 0 R >>" % (offsets.size + 1)
a << 'startxref'
a << xrefstart.to_s
a << '%%EOF'
a.join("\n")
end
end
def ref(x)
'%i 0 R' % x
end
def jbig2pdf(symboltable='output.sym', pagefiles=Dir['output.[0-9]*'])
doc = Doc.new
doc.add_object(Obj.new({'Type' => '/Catalog', 'Outlines' => ref(2), 'Pages' => ref(3)}))
doc.add_object(Obj.new({'Type' => '/Outlines', 'Count' => '0'}))
pages = Obj.new({'Type' => '/Pages'})
doc.add_object(pages)
symd = doc.add_object(Obj.new({}, File::read(symboltable, mode: "rb")))
page_objs = []
pagefiles.sort!
for p in pagefiles
begin
contents = File::read(p, mode: "rb")
rescue IOError
$stderr.puts("error reading page file %s\n"% p)
next
end
width, height, xres, yres = contents[11..26].unpack('NNNN')
xres = dpi if xres == 0
yres = dpi if yres == 0
xobj = Obj.new({'Type' => '/XObject', 'Subtype' => '/Image', 'Width' =>
width.to_s, 'Height' => height.to_s, 'ColorSpace' => '/DeviceGray',
'BitsPerComponent' => '1', 'Filter' => '/JBIG2Decode', 'DecodeParms' =>
' << /JBIG2Globals %i 0 R >>' % symd.id}, contents)
contents = Obj.new({}, 'q %f 0 0 %f 0 0 cm /Im1 Do Q' % [(width.to_f * 72) / xres, (height.to_f * 72) / yres])
resources = Obj.new({'ProcSet' => '[/PDF /ImageB]',
'XObject' => '<< /Im1 %i 0 R >>' % xobj.id})
page = Obj.new({'Type' => '/Page', 'Parent' => '3 0 R',
'MediaBox' => '[ 0 0 %f %f ]' % [(width.to_f * 72) / xres, (height.to_f * 72) / yres],
'Contents' => ref(contents.id),
'Resources' => ref(resources.id)})
for x in [xobj, contents, resources, page]
doc.add_object(x)
end
page_objs << page
pages.d.d['Count'] = page_objs.size.to_s
pages.d.d['Kids'] = '[' + page_objs.map{|x| ref(x.id)}.join(" ") + "]"
end
doc.to_s
end
def usage(msg)
$stderr.puts("%s: %s\n"% ["pdf.rb", msg])
$stderr.puts("Usage: %s [file_basename] > out.pdf\n"% "pdf.rb")
exit(false)
end
########## Main ##########
if __FILE__ == $0
if ARGV.size == 1
sym = ARGV[0] + '.sym'
pages = Dir[ARGV[0] + '.[0-9]*']
elsif ARGV.size == 0
sym = 'output.sym'
pages = Dir['output.[0-9]*']
else
usage("Wrong number of arguments!")
end
if not File.exists?(sym)
usage("Symbol table %s not found!"% sym)
elsif pages.size == 0
usage("No pages found!")
end
print(jbig2pdf(sym, pages))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment