Skip to content

Instantly share code, notes, and snippets.

@arsane
Created August 9, 2010 09:13
Show Gist options
  • Save arsane/515181 to your computer and use it in GitHub Desktop.
Save arsane/515181 to your computer and use it in GitHub Desktop.
a small script used to split each scanned pdf page into two images, and generate one pdf file.
#!/usr/bin/python
# for scanned page, this simple script
# split and dump each page into two png files.
#
# then these pages can be combined into one pdf
# file again.
#
# To merge png files into one pdf, I prefer to
# load pngs files with f-spot, and print the images
# into one pdf file. (set the printer to use scale, and
# one image per page.
import poppler
import gtk
import urllib
import sys
width = 2000
# split pages
def toimags(filename):
doc = poppler.document_new_from_file('file://%s' % urllib.pathname2url(filename),
password=None)
for i in range(doc.get_n_pages()):
j = 2 * i;
page = doc.get_page(i)
height = (width * page.get_size()[1] / page.get_size()[0]) * 0.5
pixbuf1 = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB, True, 8, width, height)
pixbuf2 = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB, True, 8, width, height)
page.render_to_pixbuf(src_x=0, src_y=0, src_width=width,
src_height=height,
scale=width/page.get_size()[0],
rotation=0, pixbuf=pixbuf1)
page.render_to_pixbuf(src_x=0, src_y=height, src_width=width,
src_height=height,
scale=width/page.get_size()[0],
rotation=0, pixbuf=pixbuf2)
pixbuf2.rotate_simple(270).save('test-%03d.png' % j, 'png')
j = j + 1
pixbuf1.rotate_simple(270).save('test-%03d.png' % j, 'png')
if __name__ == '__main__':
toimags(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment