derrickturk/pdfsplit.py

## pdfsplit.py
import sys
import os
import subprocess
import tempfile

PDFINFO_BINARY = 'pdfinfo'
GS_BINARY = 'gswin64c'

class ShellCmdError(Exception):
    pass

def main(args):
    if len(args) != 5:
        print('Usage: {0} pdf-file rows columns output-file'.format(
            args[0] if args else 'pdfsplit', file=sys.stderr))
        return 0

    try:
        rows = int(args[2])
        columns = int(args[3])
    except ValueError:
        print('Invalid dimension specification.', file=sys.stderr)
        print('Usage: {0} pdf-file rows columns output-file'.format(
            args[0] if args else 'pdfsplit', file=sys.stderr))
        return 0

    size = pdf_dimensions(args[1])
    badsize = False
    if (size[0] % rows != 0):
        print(('Vertical dimension ({0} pts) not evenly divisible ' +
            'by {1} rows!').format(size[0], rows), file=sys.stderr)
        badsize = True
    if (size[1] % columns != 0):
        print(('Horizontal dimension ({0} pts) not evenly divisible ' +
            'by {1} columns!').format(size[1], columns), file=sys.stderr)
        badsize = True

    if badsize:
        return 0

    row_step = size[0] // rows
    column_step = size[1] // columns

    temporaries = [tempfile.NamedTemporaryFile(suffix='.pdf', delete=False)
            for _ in range(rows * columns)]
    for t in temporaries:
        t.close()
    temporaries = [t.name for t in temporaries]

    try:
        for row in range(rows):
            for column in range(columns):
                pdf_segment(args[1], size, rows, columns, row_step, column_step,
                        row, column, temporaries[row * columns + column])

        pdf_combine(args[4], *temporaries)
    finally:
        for t in temporaries:
            os.remove(t)

    return 0

def pdf_dimensions(filename):
    SIZE_LABEL = b'Page size:'
    SIZE_TERMINATOR = b'pts'
    SIZE_INFIX = b' x '

    try:
        pdf_info = subprocess.check_output([PDFINFO_BINARY, filename])
    except subprocess.CalledProccessError as e:
        raise ShellCmdError(e)

    size_pos = pdf_info.find(SIZE_LABEL)
    if size_pos == -1:
        raise ShellCmdError('Page size record not found in pdfinfo output.')
    term_pos = pdf_info.find(SIZE_TERMINATOR, size_pos + len(SIZE_LABEL))
    if term_pos == -1:
        raise ShellCmdError('Valid page size record not found in ' +
                'pdfinfo output')
    size_info = pdf_info[size_pos + len(SIZE_LABEL):term_pos]
    size = size_info.strip().split(SIZE_INFIX)
    if len(size) != 2:
        raise ShellCmdError('Valid page size record not found in ' +
                'pdfinfo output')
    try:
        # rows (vertical), columns (horizontal)
        size = (int(size[1]), int(size[0]))
    except ValueError:
        raise ShellCmdError('Valid page size record not found in ' +
                'pdfinfo output')

    return size

def pdf_segment(filename, size, rows, columns, row_step, column_step,
        i, j, output_filename):
    ret = subprocess.call([GS_BINARY, '-o', output_filename, '-sDEVICE=pdfwrite',
        '-g' + str(size[1] * 10 // columns) + 'x' + str(size[0] * 10 // rows),
        '-c', '<</PageOffset [' +
        str(-j * column_step) + ' ' + str((i + 1 - rows) * row_step) +
        ']>> setpagedevice',
        '-f', filename],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL)
    if ret != 0:
        raise ShellCmdError('Error using gs to segment file.')

def pdf_combine(output_filename, *filenames):
    ret = subprocess.call([GS_BINARY, '-o', output_filename, '-sDEVICE=pdfwrite'] +
            list(filenames),
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL)
    if ret != 0:
        raise ShellCmdError('Error using gs to segment file.')

if __name__ == '__main__':
    sys.exit(main(sys.argv))
	import sys
	import os
	import subprocess
	import tempfile

	PDFINFO_BINARY = 'pdfinfo'
	GS_BINARY = 'gswin64c'

	class ShellCmdError(Exception):
	pass

	def main(args):
	if len(args) != 5:
	print('Usage: {0} pdf-file rows columns output-file'.format(
	args[0] if args else 'pdfsplit', file=sys.stderr))
	return 0

	try:
	rows = int(args[2])
	columns = int(args[3])
	except ValueError:
	print('Invalid dimension specification.', file=sys.stderr)
	print('Usage: {0} pdf-file rows columns output-file'.format(
	args[0] if args else 'pdfsplit', file=sys.stderr))
	return 0

	size = pdf_dimensions(args[1])
	badsize = False
	if (size[0] % rows != 0):
	print(('Vertical dimension ({0} pts) not evenly divisible ' +
	'by {1} rows!').format(size[0], rows), file=sys.stderr)
	badsize = True
	if (size[1] % columns != 0):
	print(('Horizontal dimension ({0} pts) not evenly divisible ' +
	'by {1} columns!').format(size[1], columns), file=sys.stderr)
	badsize = True

	if badsize:
	return 0

	row_step = size[0] // rows
	column_step = size[1] // columns

	temporaries = [tempfile.NamedTemporaryFile(suffix='.pdf', delete=False)
	for _ in range(rows * columns)]
	for t in temporaries:
	t.close()
	temporaries = [t.name for t in temporaries]

	try:
	for row in range(rows):
	for column in range(columns):
	pdf_segment(args[1], size, rows, columns, row_step, column_step,
	row, column, temporaries[row * columns + column])

	pdf_combine(args[4], *temporaries)
	finally:
	for t in temporaries:
	os.remove(t)

	return 0

	def pdf_dimensions(filename):
	SIZE_LABEL = b'Page size:'
	SIZE_TERMINATOR = b'pts'
	SIZE_INFIX = b' x '

	try:
	pdf_info = subprocess.check_output([PDFINFO_BINARY, filename])
	except subprocess.CalledProccessError as e:
	raise ShellCmdError(e)

	size_pos = pdf_info.find(SIZE_LABEL)
	if size_pos == -1:
	raise ShellCmdError('Page size record not found in pdfinfo output.')
	term_pos = pdf_info.find(SIZE_TERMINATOR, size_pos + len(SIZE_LABEL))
	if term_pos == -1:
	raise ShellCmdError('Valid page size record not found in ' +
	'pdfinfo output')
	size_info = pdf_info[size_pos + len(SIZE_LABEL):term_pos]
	size = size_info.strip().split(SIZE_INFIX)
	if len(size) != 2:
	raise ShellCmdError('Valid page size record not found in ' +
	'pdfinfo output')
	try:
	# rows (vertical), columns (horizontal)
	size = (int(size[1]), int(size[0]))
	except ValueError:
	raise ShellCmdError('Valid page size record not found in ' +
	'pdfinfo output')

	return size

	def pdf_segment(filename, size, rows, columns, row_step, column_step,
	i, j, output_filename):
	ret = subprocess.call([GS_BINARY, '-o', output_filename, '-sDEVICE=pdfwrite',
	'-g' + str(size[1] * 10 // columns) + 'x' + str(size[0] * 10 // rows),
	'-c', '<</PageOffset [' +
	str(-j * column_step) + ' ' + str((i + 1 - rows) * row_step) +
	']>> setpagedevice',
	'-f', filename],
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL)
	if ret != 0:
	raise ShellCmdError('Error using gs to segment file.')

	def pdf_combine(output_filename, *filenames):
	ret = subprocess.call([GS_BINARY, '-o', output_filename, '-sDEVICE=pdfwrite'] +
	list(filenames),
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL)
	if ret != 0:
	raise ShellCmdError('Error using gs to segment file.')

	if __name__ == '__main__':
	sys.exit(main(sys.argv))