sekika/concatepdf.py

## concatepdf.py
#!/usr/bin/env python3
"""Concatenate PDF images with multiple images on a single page

This is a Python script designed to concatenate PDF images into
a single file. The script arranges multiple PDF images both
horizontally and vertically on a single page, allowing for
customizable image counts along each axis. This results in
multiple pages being generated.

In stalled required library by the following command:

  python3 -m pip install argparse PyPDF3

After installation, place this script in your system's PATH and
make it executable. You can rename it to "concatpdf" for
convenience. For instance, if you have several PDF image files in
the current directory, execute the following command:

  concatepdf -i *.pdf -o output/output.pdf -c 2 -r 3

In this example, the image files will be concatenated into
"output/output.pdf," with 2 images arranged horizontally and 3
images vertically on each page, resulting in 6 images per page.
To view available command-line options, run:

  concatepdf -h

If you prefer to use the script within another Python script, you
can utilize the concat_pdf_pages function.

Author: Katsutoshi Seki
Website: https://github.com/sekika
Script URL: https://gist.github.com/sekika/1e8811868cebeca6c3443c69849929db
Japanese explanation: https://sekika.github.io/2023/08/09/concatepdf/
License: MIT license
"""

DESCRIPTION = 'Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages'
MAX_OPEN_FILE = 100


def main():
    import argparse
    parser = argparse.ArgumentParser(description=DESCRIPTION)
    parser.add_argument("-i", "--input", type=str, nargs="+",
                        required=True, help="Input files")
    parser.add_argument("-o", "--output", type=str,
                        required=True, help="Output file")
    parser.add_argument("-c", "--columns", type=int, default=1,
                        help="Numbers of columns in a page")
    parser.add_argument("-r", "--rows", type=int, default=1,
                        help="Numbers of rows in a page")
    args = parser.parse_args()
    concat_pdf_pages(args.input, args.output, args.columns, args.rows)


def concat_pdf_pages(input_files, output_file, col, row):
    """Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages

    Input
        input_files: File name of input files
        output_file: File name of output file
        col: Colums in a page
        row: Rows in a page

    Return
        nothing
    """
    from PyPDF3 import PdfFileWriter, PdfFileReader
    if len(input_files) > max(MAX_OPEN_FILE, col*row):
        use_temp_files(input_files, output_file, col, row)
        return
    output = PdfFileWriter()
    for i in range(0, len(input_files), col*row):
        page = []
        input = []
        for j in range(i, min(i+col*row, len(input_files))):
            input.append(PdfFileReader(
                open(input_files[j], "rb"), strict=False).getPage(0))
        for j in range(0, min(col*row, len(input_files)-i), col):
            page.append(concat_pdf(
                input[j:min(j+col, len(input))], horizontal=True))
        output.addPage(concat_pdf(page))
    output.write(open(output_file, "wb"))


def use_temp_files(input_files, output_file, col, row):
    """Same as concat_pdf_pages but using temporary files
    to avoid error for opening too many files.

    When there re more than MAX_OPEN_FILE, this function is called."""
    import os
    import PyPDF3
    total_files = len(input_files)
    pdf_in_page = col*row
    pdf_in_file = max((MAX_OPEN_FILE // pdf_in_page),1) * pdf_in_page
    num_files = (total_files - 1) // pdf_in_file + 1
    print('As numbers of pdf ({0}) exceeds {1}, temporary files tmp_**_{2} are created.'.format(
        total_files, MAX_OPEN_FILE, output_file))
    tmp_files = []
    for i in range(num_files):
        tmp_file = 'tmp_'+str(i)+'_'+output_file
        tmp_files.append(tmp_file)
        print('Creating ' + tmp_file + '\r', end='')
        begin = pdf_in_file * i
        end = min(total_files, pdf_in_file * (i+1))
        concat_pdf_pages(input_files[begin:end], tmp_file, col, row)
    print('Creating ' + output_file + '         \r', end='')
    merger = PyPDF3.PdfFileMerger()
    for file in tmp_files:
        merger.append(file)
    merger.write(output_file)
    merger.close()
    for file in tmp_files:
        os.remove(file)
    print('{0} was created and temporary files were removed.'.format(output_file))


def concat_pdf(pages, horizontal=False):
    """Concatenate PDF vertically or horizontally

    Input
        pages: List of PDF objects of PyPDF3
        horizontal: Align horizontally if set true, vertically otherwise

    Return
        PDF object of PyPDF3
    """
    from PyPDF3.pdf import PageObject
    if horizontal:
        total_width = sum(page.mediaBox.upperRight[0] for page in pages)
        total_height = max(page.mediaBox.upperRight[1] for page in pages)
    else:
        total_width = max(page.mediaBox.upperRight[0] for page in pages)
        total_height = sum(page.mediaBox.upperRight[1] for page in pages)
    new_page = PageObject.createBlankPage(None, total_width, total_height)
    if horizontal:
        new_page.mergePage(pages[0])
        offset = pages[0].mediaBox.upperRight[0]
    else:
        offset = total_height - pages[0].mediaBox.upperRight[1]
        new_page.mergeTranslatedPage(pages[0], 0, offset)
    if len(pages) > 1:
        for i in range(1, len(pages)):
            if horizontal:
                new_page.mergeTranslatedPage(pages[i], offset, 0)
                offset += pages[i].mediaBox.upperRight[0]
            else:
                offset -= pages[i].mediaBox.upperRight[1]
                new_page.mergeTranslatedPage(pages[i], 0, offset)
    return new_page


if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""Concatenate PDF images with multiple images on a single page

	This is a Python script designed to concatenate PDF images into
	a single file. The script arranges multiple PDF images both
	horizontally and vertically on a single page, allowing for
	customizable image counts along each axis. This results in
	multiple pages being generated.

	In stalled required library by the following command:

	python3 -m pip install argparse PyPDF3

	After installation, place this script in your system's PATH and
	make it executable. You can rename it to "concatpdf" for
	convenience. For instance, if you have several PDF image files in
	the current directory, execute the following command:

	concatepdf -i *.pdf -o output/output.pdf -c 2 -r 3

	In this example, the image files will be concatenated into
	"output/output.pdf," with 2 images arranged horizontally and 3
	images vertically on each page, resulting in 6 images per page.
	To view available command-line options, run:

	concatepdf -h

	If you prefer to use the script within another Python script, you
	can utilize the concat_pdf_pages function.

	Author: Katsutoshi Seki
	Website: https://github.com/sekika
	Script URL: https://gist.github.com/sekika/1e8811868cebeca6c3443c69849929db
	Japanese explanation: https://sekika.github.io/2023/08/09/concatepdf/
	License: MIT license
	"""

	DESCRIPTION = 'Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages'
	MAX_OPEN_FILE = 100


	def main():
	import argparse
	parser = argparse.ArgumentParser(description=DESCRIPTION)
	parser.add_argument("-i", "--input", type=str, nargs="+",
	required=True, help="Input files")
	parser.add_argument("-o", "--output", type=str,
	required=True, help="Output file")
	parser.add_argument("-c", "--columns", type=int, default=1,
	help="Numbers of columns in a page")
	parser.add_argument("-r", "--rows", type=int, default=1,
	help="Numbers of rows in a page")
	args = parser.parse_args()
	concat_pdf_pages(args.input, args.output, args.columns, args.rows)


	def concat_pdf_pages(input_files, output_file, col, row):
	"""Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages

	Input
	input_files: File name of input files
	output_file: File name of output file
	col: Colums in a page
	row: Rows in a page

	Return
	nothing
	"""
	from PyPDF3 import PdfFileWriter, PdfFileReader
	if len(input_files) > max(MAX_OPEN_FILE, col*row):
	use_temp_files(input_files, output_file, col, row)
	return
	output = PdfFileWriter()
	for i in range(0, len(input_files), col*row):
	page = []
	input = []
	for j in range(i, min(i+col*row, len(input_files))):
	input.append(PdfFileReader(
	open(input_files[j], "rb"), strict=False).getPage(0))
	for j in range(0, min(col*row, len(input_files)-i), col):
	page.append(concat_pdf(
	input[j:min(j+col, len(input))], horizontal=True))
	output.addPage(concat_pdf(page))
	output.write(open(output_file, "wb"))


	def use_temp_files(input_files, output_file, col, row):
	"""Same as concat_pdf_pages but using temporary files
	to avoid error for opening too many files.

	When there re more than MAX_OPEN_FILE, this function is called."""
	import os
	import PyPDF3
	total_files = len(input_files)
	pdf_in_page = col*row
	pdf_in_file = max((MAX_OPEN_FILE // pdf_in_page),1) * pdf_in_page
	num_files = (total_files - 1) // pdf_in_file + 1
	print('As numbers of pdf ({0}) exceeds {1}, temporary files tmp_**_{2} are created.'.format(
	total_files, MAX_OPEN_FILE, output_file))
	tmp_files = []
	for i in range(num_files):
	tmp_file = 'tmp_'+str(i)+'_'+output_file
	tmp_files.append(tmp_file)
	print('Creating ' + tmp_file + '\r', end='')
	begin = pdf_in_file * i
	end = min(total_files, pdf_in_file * (i+1))
	concat_pdf_pages(input_files[begin:end], tmp_file, col, row)
	print('Creating ' + output_file + ' \r', end='')
	merger = PyPDF3.PdfFileMerger()
	for file in tmp_files:
	merger.append(file)
	merger.write(output_file)
	merger.close()
	for file in tmp_files:
	os.remove(file)
	print('{0} was created and temporary files were removed.'.format(output_file))


	def concat_pdf(pages, horizontal=False):
	"""Concatenate PDF vertically or horizontally

	Input
	pages: List of PDF objects of PyPDF3
	horizontal: Align horizontally if set true, vertically otherwise

	Return
	PDF object of PyPDF3
	"""
	from PyPDF3.pdf import PageObject
	if horizontal:
	total_width = sum(page.mediaBox.upperRight[0] for page in pages)
	total_height = max(page.mediaBox.upperRight[1] for page in pages)
	else:
	total_width = max(page.mediaBox.upperRight[0] for page in pages)
	total_height = sum(page.mediaBox.upperRight[1] for page in pages)
	new_page = PageObject.createBlankPage(None, total_width, total_height)
	if horizontal:
	new_page.mergePage(pages[0])
	offset = pages[0].mediaBox.upperRight[0]
	else:
	offset = total_height - pages[0].mediaBox.upperRight[1]
	new_page.mergeTranslatedPage(pages[0], 0, offset)
	if len(pages) > 1:
	for i in range(1, len(pages)):
	if horizontal:
	new_page.mergeTranslatedPage(pages[i], offset, 0)
	offset += pages[i].mediaBox.upperRight[0]
	else:
	offset -= pages[i].mediaBox.upperRight[1]
	new_page.mergeTranslatedPage(pages[i], 0, offset)
	return new_page


	if __name__ == "__main__":
	main()