Skip to content

Instantly share code, notes, and snippets.

@kmlyvens
Created April 2, 2016 11:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmlyvens/b532c7aec2fe2bd8214ae2b3faf8f741 to your computer and use it in GitHub Desktop.
Save kmlyvens/b532c7aec2fe2bd8214ae2b3faf8f741 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# Copyright 2006 Google Inc.
# Author: agl@imperialviolet.org (Adam Langley)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# JBIG2 Encoder
# https://github.com/agl/jbig2enc
import sys
import re
import struct
import glob
import os
# This is a very simple script to make a PDF file out of the output of a
# multipage symbol compression.
# Run ./jbig2 -s -p <other options> image1.jpeg image1.jpeg ...
# python pdf.py output > out.pdf
dpi = 72
class Ref:
def __init__(self, x):
self.x = x
def __str__(self):
return "%d 0 R" % self.x
class Dict:
def __init__(self, values = {}):
self.d = {}
self.d.update(values)
def __str__(self):
s = ['<< ']
for (x, y) in self.d.items():
s.append('/%s ' % x)
s.append(str(y))
s.append("\n")
s.append(">>\n")
return ''.join(s)
global_next_id = 1
class Obj:
next_id = 1
def __init__(self, d = {}, stream = None):
global global_next_id
if stream is not None:
d['Length'] = str(len(stream))
self.d = Dict(d)
self.stream = stream
self.id = global_next_id
global_next_id += 1
def __str__(self):
s = []
s.append(str(self.d))
if self.stream is not None:
s.append('stream\n')
s.append(self.stream)
s.append('\nendstream\n')
s.append('endobj\n')
return ''.join(s)
class Doc:
def __init__(self):
self.objs = []
self.pages = []
def add_object(self, o):
self.objs.append(o)
return o
def add_page(self, o):
self.pages.append(o)
return self.add_object(o)
def __str__(self):
a = []
j = [0]
offsets = []
def add(x):
a.append(x)
j[0] += len(x) + 1
add('%PDF-1.4')
for o in self.objs:
offsets.append(j[0])
add('%d 0 obj' % o.id)
add(str(o))
xrefstart = j[0]
a.append('xref')
a.append('0 %d' % (len(offsets) + 1))
a.append('0000000000 65535 f ')
for o in offsets:
a.append('%010d 00000 n ' % o)
a.append('')
a.append('trailer')
a.append('<< /Size %d\n/Root 1 0 R >>' % (len(offsets) + 1))
a.append('startxref')
a.append(str(xrefstart))
a.append('%%EOF')
# sys.stderr.write(str(offsets) + "\n")
return '\n'.join(a)
def ref(x):
return '%d 0 R' % x
def main(symboltable=None, pagefiles=glob.glob('page-*')):
doc = Doc()
doc.add_object(Obj({'Type' : '/Catalog', 'Outlines' : ref(2), 'Pages' : ref(3)}))
doc.add_object(Obj({'Type' : '/Outlines', 'Count': '0'}))
pages = Obj({'Type' : '/Pages'})
doc.add_object(pages)
if symboltable:
symd = doc.add_object(Obj({}, file(symboltable, 'rb').read()))
page_objs = []
pagefiles.sort()
for p in pagefiles:
try:
contents = file(p, mode='rb').read()
except IOError:
sys.stderr.write("error reading page file %s\n"% p)
continue
(width, height, xres, yres) = struct.unpack('>IIII', contents[11:27])
if xres == 0:
xres = dpi
if yres == 0:
yres = dpi
lexicon={'Type': '/XObject', 'Subtype': '/Image', 'Width':
str(width), 'Height': str(height), 'ColorSpace': '/DeviceGray',
'BitsPerComponent': '1', 'Filter': '/JBIG2Decode'}
if symboltable:
lexicon['DecodeParms']=' << /JBIG2Globals %d 0 R >>' % symd.id
xobj = Obj(lexicon, contents)
contents = Obj({}, 'q %f 0 0 %f 0 0 cm /Im1 Do Q' % (float(width * 72) / xres, float(height * 72) / yres))
resources = Obj({'ProcSet': '[/PDF /ImageB]',
'XObject': '<< /Im1 %d 0 R >>' % xobj.id})
page = Obj({'Type': '/Page', 'Parent': '3 0 R',
'MediaBox': '[ 0 0 %f %f ]' % (float(width * 72) / xres, float(height * 72) / yres),
'Contents': ref(contents.id),
'Resources': ref(resources.id)})
[doc.add_object(x) for x in [xobj, contents, resources, page]]
page_objs.append(page)
pages.d.d['Count'] = str(len(page_objs))
pages.d.d['Kids'] = '[' + ' '.join([ref(x.id) for x in page_objs]) + ']'
print str(doc)
def usage(script, msg):
if msg:
sys.stderr.write("%s: %s\n"% (script, msg))
sys.stderr.write("Usage: %s [file_basename] > out.pdf\n"% script)
sys.exit(1)
if __name__ == '__main__':
if sys.platform == "win32":
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
sym = False
pages=sys.argv[1:]
main(sym, pages)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment