Skip to content

Instantly share code, notes, and snippets.

@addam
Created June 7, 2016 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save addam/1ee1085f2d450dd737f9ca63bb83a2fd to your computer and use it in GitHub Desktop.
Save addam/1ee1085f2d450dd737f9ca63bb83a2fd to your computer and use it in GitHub Desktop.
minimalistic script that creates a PDF file
def format_dict(obj, refs=tuple()):
return "<< " + "".join("/{} {}\n".format(key, format_value(value, refs)) for (key, value) in obj.items()) + ">>"
def format_value(value, refs=tuple()):
if value in refs:
return "{} 0 R".format(refs.index(value) + 1)
elif type(value) is dict:
return format_dict(value, refs)
elif type(value) is list:
return "[ " + " ".join(format_value(item, refs) for item in value) + " ]"
elif type(value) in (int, float):
return str(value)
elif type(value) is bool:
return "true" if value else "false"
else:
return "/{}".format(value) # this script can output only PDF names, no strings
def write_object(index, obj, refs, f, stream=None):
byte_count = f.write("{} 0 obj\n".format(index))
if type(obj) is not dict:
stream, obj = obj, dict()
elif "stream" in obj:
stream = obj.pop("stream")
if stream:
obj["Length"] = len(stream)
byte_count += f.write(format_dict(obj, refs))
if stream:
byte_count += f.write("\nstream\n")
byte_count += f.write(stream)
byte_count += f.write("\nendstream")
return byte_count + f.write("\nendobj\n")
def write(*data, file):
xref = list()
file.position = file.write("%PDF-1.4\n")
for index, obj in enumerate(data, 1):
xref.append(file.position)
file.position += write_object(index, obj, data, file)
xref_pos = file.position
file.write("xref\n0 {}\n".format(len(xref) + 1))
file.write("{:010} {:05} f\n".format(0, 65536))
for position in xref:
file.write("{:010} {:05} n\n".format(position, 0))
file.write("trailer\n")
catalog = next(ob for ob in data if ob["Type"] == "Catalog")
f.write(format_dict({"Size": len(xref), "Root": catalog}, data))
file.write("\nstartxref\n{}\n%%EOF\n".format(xref_pos))
def encode(data):
from base64 import a85encode
from zlib import compress
if hasattr(data, "encode"):
data = data.encode()
return a85encode(compress(data), adobe=True, wrapcol=250)[2:].decode()
root = {"Type": "Pages", "MediaBox": [0, 0, 595, 842], "Kids": list()}
catalog = {"Type": "Catalog", "Pages": root}
font = {"Type": "Font", "Subtype": "Type1", "Name": "Font1", "BaseFont": "Helvetica", "Encoding": "MacRomanEncoding"}
# All graphic commands use prefix notation and have a fixed number of parameters
content_streams = [
"BT /F1 10 Tf 1 1 Td (Hello world) Tj ET", # BT..ET: begin..end text; /F1 references the font resource
"15 25 m 15 35 l S 4 w [ 4 6 ] 0 d 15 25 m 40 25 l S [ ] 0 d 1 w", # m, l, S: move, line, stroke; w: width; d: dasharray
"100 0 0 100 0 0 cm /Im1 Do"] # cm: matrix in column-major notation; Do: draw object, images are always 1x1
image = {"Type": "XObject", "Subtype": "Image", "Width": 2, "Height": 3, "ColorSpace": "DeviceGray", "BitsPerComponent": 8, "Interpolate": False, "Filter": ["ASCII85Decode", "FlateDecode"], "stream": encode(b"\x22\x44\x00\x88\xff\xcc")}
for content in content_streams:
# in fact, Im1 should be a resource only on the page that uses it
resources = {"Font": {"F1": font}, "XObject": {"Im1": image} if content.find("Do") != -1 else dict()}
page = {"Type": "Page", "Parent": root, "Contents": content, "Resources": resources}
root["Kids"].append(page)
root["Count"] = len(root["Kids"])
with open("out.pdf", "w+") as f:
write(catalog, root, font, *root["Kids"], *content_streams, image, file=f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment