addam/pdf.py

## pdf.py
def format_dict(obj, refs=tuple()):
    return "<< " + "".join("/{} {}\n".format(key, format_value(value, refs)) for (key, value) in obj.items()) + ">>"

def format_value(value, refs=tuple()):
    if value in refs:
        return "{} 0 R".format(refs.index(value) + 1)
    elif type(value) is dict:
        return format_dict(value, refs)
    elif type(value) is list:
        return "[ " + " ".join(format_value(item, refs) for item in value) + " ]"
    elif type(value) in (int, float):
        return str(value)
    elif type(value) is bool:
        return "true" if value else "false"
    else:
        return "/{}".format(value)  # this script can output only PDF names, no strings

def write_object(index, obj, refs, f, stream=None):
    byte_count = f.write("{} 0 obj\n".format(index))
    if type(obj) is not dict:
        stream, obj = obj, dict()
    elif "stream" in obj:
        stream = obj.pop("stream")
    if stream:
        obj["Length"] = len(stream)
    byte_count += f.write(format_dict(obj, refs))
    if stream:
        byte_count += f.write("\nstream\n")
        byte_count += f.write(stream)
        byte_count += f.write("\nendstream")
    return byte_count + f.write("\nendobj\n")

def write(*data, file):
    xref = list()
    file.position = file.write("%PDF-1.4\n")
    for index, obj in enumerate(data, 1):
        xref.append(file.position)
        file.position += write_object(index, obj, data, file)
    xref_pos = file.position
    file.write("xref\n0 {}\n".format(len(xref) + 1))
    file.write("{:010} {:05} f\n".format(0, 65536))
    for position in xref:
        file.write("{:010} {:05} n\n".format(position, 0))
    file.write("trailer\n")
    catalog = next(ob for ob in data if ob["Type"] == "Catalog")
    f.write(format_dict({"Size": len(xref), "Root": catalog}, data))
    file.write("\nstartxref\n{}\n%%EOF\n".format(xref_pos))

def encode(data):
    from base64 import a85encode
    from zlib import compress
    if hasattr(data, "encode"):
        data = data.encode()
    return a85encode(compress(data), adobe=True, wrapcol=250)[2:].decode()

root = {"Type": "Pages", "MediaBox": [0, 0, 595, 842], "Kids": list()}
catalog = {"Type": "Catalog", "Pages": root}
font = {"Type": "Font", "Subtype": "Type1", "Name": "Font1", "BaseFont": "Helvetica", "Encoding": "MacRomanEncoding"}
# All graphic commands use prefix notation and have a fixed number of parameters
content_streams = [
    "BT /F1 10 Tf 1 1 Td (Hello world) Tj ET",  # BT..ET: begin..end text; /F1 references the font resource
    "15 25 m 15 35 l S 4 w [ 4 6 ] 0 d 15 25 m 40 25 l S [ ] 0 d 1 w",  # m, l, S: move, line, stroke; w: width; d: dasharray
    "100 0 0 100 0 0 cm /Im1 Do"]  # cm: matrix in column-major notation; Do: draw object, images are always 1x1
image = {"Type": "XObject", "Subtype": "Image", "Width": 2, "Height": 3, "ColorSpace": "DeviceGray", "BitsPerComponent": 8, "Interpolate": False, "Filter": ["ASCII85Decode", "FlateDecode"], "stream": encode(b"\x22\x44\x00\x88\xff\xcc")}
for content in content_streams:
    # in fact, Im1 should be a resource only on the page that uses it
    resources = {"Font": {"F1": font}, "XObject": {"Im1": image} if content.find("Do") != -1 else dict()}
    page = {"Type": "Page", "Parent": root, "Contents": content, "Resources": resources}
    root["Kids"].append(page)
root["Count"] = len(root["Kids"])

with open("out.pdf", "w+") as f:
    write(catalog, root, font, *root["Kids"], *content_streams, image, file=f)
	def format_dict(obj, refs=tuple()):
	return "<< " + "".join("/{} {}\n".format(key, format_value(value, refs)) for (key, value) in obj.items()) + ">>"

	def format_value(value, refs=tuple()):
	if value in refs:
	return "{} 0 R".format(refs.index(value) + 1)
	elif type(value) is dict:
	return format_dict(value, refs)
	elif type(value) is list:
	return "[ " + " ".join(format_value(item, refs) for item in value) + " ]"
	elif type(value) in (int, float):
	return str(value)
	elif type(value) is bool:
	return "true" if value else "false"
	else:
	return "/{}".format(value) # this script can output only PDF names, no strings

	def write_object(index, obj, refs, f, stream=None):
	byte_count = f.write("{} 0 obj\n".format(index))
	if type(obj) is not dict:
	stream, obj = obj, dict()
	elif "stream" in obj:
	stream = obj.pop("stream")
	if stream:
	obj["Length"] = len(stream)
	byte_count += f.write(format_dict(obj, refs))
	if stream:
	byte_count += f.write("\nstream\n")
	byte_count += f.write(stream)
	byte_count += f.write("\nendstream")
	return byte_count + f.write("\nendobj\n")

	def write(*data, file):
	xref = list()
	file.position = file.write("%PDF-1.4\n")
	for index, obj in enumerate(data, 1):
	xref.append(file.position)
	file.position += write_object(index, obj, data, file)
	xref_pos = file.position
	file.write("xref\n0 {}\n".format(len(xref) + 1))
	file.write("{:010} {:05} f\n".format(0, 65536))
	for position in xref:
	file.write("{:010} {:05} n\n".format(position, 0))
	file.write("trailer\n")
	catalog = next(ob for ob in data if ob["Type"] == "Catalog")
	f.write(format_dict({"Size": len(xref), "Root": catalog}, data))
	file.write("\nstartxref\n{}\n%%EOF\n".format(xref_pos))

	def encode(data):
	from base64 import a85encode
	from zlib import compress
	if hasattr(data, "encode"):
	data = data.encode()
	return a85encode(compress(data), adobe=True, wrapcol=250)[2:].decode()

	root = {"Type": "Pages", "MediaBox": [0, 0, 595, 842], "Kids": list()}
	catalog = {"Type": "Catalog", "Pages": root}
	font = {"Type": "Font", "Subtype": "Type1", "Name": "Font1", "BaseFont": "Helvetica", "Encoding": "MacRomanEncoding"}
	# All graphic commands use prefix notation and have a fixed number of parameters
	content_streams = [
	"BT /F1 10 Tf 1 1 Td (Hello world) Tj ET", # BT..ET: begin..end text; /F1 references the font resource
	"15 25 m 15 35 l S 4 w [ 4 6 ] 0 d 15 25 m 40 25 l S [ ] 0 d 1 w", # m, l, S: move, line, stroke; w: width; d: dasharray
	"100 0 0 100 0 0 cm /Im1 Do"] # cm: matrix in column-major notation; Do: draw object, images are always 1x1
	image = {"Type": "XObject", "Subtype": "Image", "Width": 2, "Height": 3, "ColorSpace": "DeviceGray", "BitsPerComponent": 8, "Interpolate": False, "Filter": ["ASCII85Decode", "FlateDecode"], "stream": encode(b"\x22\x44\x00\x88\xff\xcc")}
	for content in content_streams:
	# in fact, Im1 should be a resource only on the page that uses it
	resources = {"Font": {"F1": font}, "XObject": {"Im1": image} if content.find("Do") != -1 else dict()}
	page = {"Type": "Page", "Parent": root, "Contents": content, "Resources": resources}
	root["Kids"].append(page)
	root["Count"] = len(root["Kids"])

	with open("out.pdf", "w+") as f:
	write(catalog, root, font, root["Kids"], content_streams, image, file=f)