Skip to content

Instantly share code, notes, and snippets.

Created May 25, 2014 21:15
Show Gist options
  • Save anonymous/22f4cb90d0d5e425d377 to your computer and use it in GitHub Desktop.
Save anonymous/22f4cb90d0d5e425d377 to your computer and use it in GitHub Desktop.
Peter's pure Python PicoPDF: Create a PDF in 100 lines of Python
from datetime import datetime
from zlib import compress
def serialize(x):
if isinstance(x, dict):
return '<<' + '\n'.join(serialize(k) + ' ' + serialize(v) for k, v in x.items()) + '>>'
if isinstance(x, list):
return '[' + ' '.join(serialize(it) for it in x) + ']'
return str(x)
def reference(objectNumber, generation=0):
return '{0} {1} R'.format(objectNumber, generation)
def stream(streamDict, streamBytes):
return '\n'.join([serialize(streamDict), 'stream', streamBytes, 'endstream'])
class PetersPurePythonPicoPDF(object):
def __init__(self):
self.compress = True
self.pageWidth = 595.28 # A4
self.pageHeight = 841.89
self.pages = [[]]
self.fonts = {}
for fontName in [ "Times-Roman", "Helvetica", "Courier", "Symbol", "Times-Bold", "Helvetica-Bold", "Courier-Bold", "ZapfDingbats", "Times-Italic", "Helvetica-Oblique", "Courier-Oblique", "Times-BoldItalic", "Helvetica-BoldOblique", "Courier-BoldOblique" ]:
self.addFont(fontName, "StandardEncoding")
def addFont(self, baseFont, encoding):
id = 'F{0}'.format(len(self.fonts) + 1)
self.fonts[id] = { 'id': id, 'baseFont' : baseFont, 'encoding': encoding }
return self.fonts[id]
def addText(self, lines, x, y, font='F1', size=16, leading=1, color='0 g'):
lines = 'T* '.join('({line}) Tj\n'.format(line=line) for line in lines)
self.pages[-1].append('BT\n/{font} {size} Tf\n{leading} TL\n{color}\n'
'{x} {y} Td\n{lines}'
'ET'.format(font=font, size=size, leading=size*leading, color=color,
x=x, y=self.pageHeight - y, lines=lines))
def save(self, filename):
indirectObjectOffsets = [0] # Current offset in [-1]
output = []
def out(*args):
v = '\n'.join(serialize(x) for x in args)
indirectObjectOffsets[-1] += len(v) + 1
output.append(v)
def lastObjectNumber():
return len(indirectObjectOffsets) - 1
def indirectObject(v):
indirectObjectOffsets.append(indirectObjectOffsets[-1])
return '{0} 0 obj\n{1}\nendobj'.format(lastObjectNumber(), serialize(v))
out('%PDF-1.3') # Header
pagesRootNumber = lastObjectNumber() + 2 * len(self.pages) + 1
resourcesNumber = pagesRootNumber + len(self.fonts) + 1
pageNumbers = []
for pageContent in self.pages:
contentNumber = lastObjectNumber() + 2
out(indirectObject({ '/Type': '/Page',
'/Parent': reference(pagesRootNumber),
'/Resources': reference(resourcesNumber),
'/Contents': reference(contentNumber) }))
pageNumbers.append(lastObjectNumber())
pageBytes = '\n'.join(pageContent)
pageStreamDict = {}
if self.compress:
pageStreamDict['/Filter'] = ['/FlateDecode']
pageBytes = compress(pageBytes)
pageStreamDict['/Length'] = len(pageBytes)
out(indirectObject(stream(pageStreamDict, pageBytes)))
out(indirectObject({ '/Type': '/Pages',
'/Kids': [reference(id) for id in pageNumbers],
'/Count': len(self.pages),
'/MediaBox': [0, 0, self.pageWidth, self.pageHeight] }))
for font in self.fonts.values():
out(indirectObject({ '/Type': '/Font',
'/Subtype': '/Type1',
'/BaseFont': '/' + font['baseFont'],
'/Encoding': '/' + font['encoding'] }))
font['objectNumber'] = lastObjectNumber()
out(indirectObject({ '/ProcSet': ['/PDF', '/Text' '/ImageB', '/ImageC', '/ImageI'],
'/Font': { '/' + key: reference(font['objectNumber']) for key, font in self.fonts.items() }}))
out(indirectObject({ '/Type': '/Catalog',
'/Pages': reference(pagesRootNumber),
'/PageLayout': '/OneColumn' }))
catalogNumber = lastObjectNumber()
totalObjects = catalogNumber + 1
cross_ref_offset = indirectObjectOffsets[-1]
out('xref')
out('0 ' + str(totalObjects))
out('0000000000 65535 f ') # First object 0 shall be free
for offset in indirectObjectOffsets[:-1]:
out('{0:010} 00000 n '.format(offset))
out('trailer', { '/Size': totalObjects,
'/Root': reference(catalogNumber) },
'startxref', cross_ref_offset, '%%EOF')
with open(filename, 'wb') as f:
f.write('\n'.join(output))
p = PetersPurePythonPicoPDF()
p.addText(["Please Peter Parker", "Princess Python possesses proding powers"], 123, 456)
p.save("pppp.pdf")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment