Skip to content

Instantly share code, notes, and snippets.

@sekika
Last active August 10, 2023 14:28
Show Gist options
  • Save sekika/1e8811868cebeca6c3443c69849929db to your computer and use it in GitHub Desktop.
Save sekika/1e8811868cebeca6c3443c69849929db to your computer and use it in GitHub Desktop.
Concatenate PDF images with multiple images on a single page
#!/usr/bin/env python3
"""Concatenate PDF images with multiple images on a single page
This is a Python script designed to concatenate PDF images into
a single file. The script arranges multiple PDF images both
horizontally and vertically on a single page, allowing for
customizable image counts along each axis. This results in
multiple pages being generated.
In stalled required library by the following command:
python3 -m pip install argparse PyPDF3
After installation, place this script in your system's PATH and
make it executable. You can rename it to "concatpdf" for
convenience. For instance, if you have several PDF image files in
the current directory, execute the following command:
concatepdf -i *.pdf -o output/output.pdf -c 2 -r 3
In this example, the image files will be concatenated into
"output/output.pdf," with 2 images arranged horizontally and 3
images vertically on each page, resulting in 6 images per page.
To view available command-line options, run:
concatepdf -h
If you prefer to use the script within another Python script, you
can utilize the concat_pdf_pages function.
Author: Katsutoshi Seki
Website: https://github.com/sekika
Script URL: https://gist.github.com/sekika/1e8811868cebeca6c3443c69849929db
Japanese explanation: https://sekika.github.io/2023/08/09/concatepdf/
License: MIT license
"""
DESCRIPTION = 'Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages'
MAX_OPEN_FILE = 100
def main():
import argparse
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument("-i", "--input", type=str, nargs="+",
required=True, help="Input files")
parser.add_argument("-o", "--output", type=str,
required=True, help="Output file")
parser.add_argument("-c", "--columns", type=int, default=1,
help="Numbers of columns in a page")
parser.add_argument("-r", "--rows", type=int, default=1,
help="Numbers of rows in a page")
args = parser.parse_args()
concat_pdf_pages(args.input, args.output, args.columns, args.rows)
def concat_pdf_pages(input_files, output_file, col, row):
"""Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages
Input
input_files: File name of input files
output_file: File name of output file
col: Colums in a page
row: Rows in a page
Return
nothing
"""
from PyPDF3 import PdfFileWriter, PdfFileReader
if len(input_files) > max(MAX_OPEN_FILE, col*row):
use_temp_files(input_files, output_file, col, row)
return
output = PdfFileWriter()
for i in range(0, len(input_files), col*row):
page = []
input = []
for j in range(i, min(i+col*row, len(input_files))):
input.append(PdfFileReader(
open(input_files[j], "rb"), strict=False).getPage(0))
for j in range(0, min(col*row, len(input_files)-i), col):
page.append(concat_pdf(
input[j:min(j+col, len(input))], horizontal=True))
output.addPage(concat_pdf(page))
output.write(open(output_file, "wb"))
def use_temp_files(input_files, output_file, col, row):
"""Same as concat_pdf_pages but using temporary files
to avoid error for opening too many files.
When there re more than MAX_OPEN_FILE, this function is called."""
import os
import PyPDF3
total_files = len(input_files)
pdf_in_page = col*row
pdf_in_file = max((MAX_OPEN_FILE // pdf_in_page),1) * pdf_in_page
num_files = (total_files - 1) // pdf_in_file + 1
print('As numbers of pdf ({0}) exceeds {1}, temporary files tmp_**_{2} are created.'.format(
total_files, MAX_OPEN_FILE, output_file))
tmp_files = []
for i in range(num_files):
tmp_file = 'tmp_'+str(i)+'_'+output_file
tmp_files.append(tmp_file)
print('Creating ' + tmp_file + '\r', end='')
begin = pdf_in_file * i
end = min(total_files, pdf_in_file * (i+1))
concat_pdf_pages(input_files[begin:end], tmp_file, col, row)
print('Creating ' + output_file + ' \r', end='')
merger = PyPDF3.PdfFileMerger()
for file in tmp_files:
merger.append(file)
merger.write(output_file)
merger.close()
for file in tmp_files:
os.remove(file)
print('{0} was created and temporary files were removed.'.format(output_file))
def concat_pdf(pages, horizontal=False):
"""Concatenate PDF vertically or horizontally
Input
pages: List of PDF objects of PyPDF3
horizontal: Align horizontally if set true, vertically otherwise
Return
PDF object of PyPDF3
"""
from PyPDF3.pdf import PageObject
if horizontal:
total_width = sum(page.mediaBox.upperRight[0] for page in pages)
total_height = max(page.mediaBox.upperRight[1] for page in pages)
else:
total_width = max(page.mediaBox.upperRight[0] for page in pages)
total_height = sum(page.mediaBox.upperRight[1] for page in pages)
new_page = PageObject.createBlankPage(None, total_width, total_height)
if horizontal:
new_page.mergePage(pages[0])
offset = pages[0].mediaBox.upperRight[0]
else:
offset = total_height - pages[0].mediaBox.upperRight[1]
new_page.mergeTranslatedPage(pages[0], 0, offset)
if len(pages) > 1:
for i in range(1, len(pages)):
if horizontal:
new_page.mergeTranslatedPage(pages[i], offset, 0)
offset += pages[i].mediaBox.upperRight[0]
else:
offset -= pages[i].mediaBox.upperRight[1]
new_page.mergeTranslatedPage(pages[i], 0, offset)
return new_page
if __name__ == "__main__":
main()
@sekika
Copy link
Author

sekika commented Aug 9, 2023

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment