Last active
August 10, 2023 14:28
-
-
Save sekika/1e8811868cebeca6c3443c69849929db to your computer and use it in GitHub Desktop.
Concatenate PDF images with multiple images on a single page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Concatenate PDF images with multiple images on a single page | |
This is a Python script designed to concatenate PDF images into | |
a single file. The script arranges multiple PDF images both | |
horizontally and vertically on a single page, allowing for | |
customizable image counts along each axis. This results in | |
multiple pages being generated. | |
In stalled required library by the following command: | |
python3 -m pip install argparse PyPDF3 | |
After installation, place this script in your system's PATH and | |
make it executable. You can rename it to "concatpdf" for | |
convenience. For instance, if you have several PDF image files in | |
the current directory, execute the following command: | |
concatepdf -i *.pdf -o output/output.pdf -c 2 -r 3 | |
In this example, the image files will be concatenated into | |
"output/output.pdf," with 2 images arranged horizontally and 3 | |
images vertically on each page, resulting in 6 images per page. | |
To view available command-line options, run: | |
concatepdf -h | |
If you prefer to use the script within another Python script, you | |
can utilize the concat_pdf_pages function. | |
Author: Katsutoshi Seki | |
Website: https://github.com/sekika | |
Script URL: https://gist.github.com/sekika/1e8811868cebeca6c3443c69849929db | |
Japanese explanation: https://sekika.github.io/2023/08/09/concatepdf/ | |
License: MIT license | |
""" | |
DESCRIPTION = 'Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages' | |
MAX_OPEN_FILE = 100 | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser(description=DESCRIPTION) | |
parser.add_argument("-i", "--input", type=str, nargs="+", | |
required=True, help="Input files") | |
parser.add_argument("-o", "--output", type=str, | |
required=True, help="Output file") | |
parser.add_argument("-c", "--columns", type=int, default=1, | |
help="Numbers of columns in a page") | |
parser.add_argument("-r", "--rows", type=int, default=1, | |
help="Numbers of rows in a page") | |
args = parser.parse_args() | |
concat_pdf_pages(args.input, args.output, args.columns, args.rows) | |
def concat_pdf_pages(input_files, output_file, col, row): | |
"""Arrange multiple PDF images both horizontally and vertically within a single page, resulting in multiple pages | |
Input | |
input_files: File name of input files | |
output_file: File name of output file | |
col: Colums in a page | |
row: Rows in a page | |
Return | |
nothing | |
""" | |
from PyPDF3 import PdfFileWriter, PdfFileReader | |
if len(input_files) > max(MAX_OPEN_FILE, col*row): | |
use_temp_files(input_files, output_file, col, row) | |
return | |
output = PdfFileWriter() | |
for i in range(0, len(input_files), col*row): | |
page = [] | |
input = [] | |
for j in range(i, min(i+col*row, len(input_files))): | |
input.append(PdfFileReader( | |
open(input_files[j], "rb"), strict=False).getPage(0)) | |
for j in range(0, min(col*row, len(input_files)-i), col): | |
page.append(concat_pdf( | |
input[j:min(j+col, len(input))], horizontal=True)) | |
output.addPage(concat_pdf(page)) | |
output.write(open(output_file, "wb")) | |
def use_temp_files(input_files, output_file, col, row): | |
"""Same as concat_pdf_pages but using temporary files | |
to avoid error for opening too many files. | |
When there re more than MAX_OPEN_FILE, this function is called.""" | |
import os | |
import PyPDF3 | |
total_files = len(input_files) | |
pdf_in_page = col*row | |
pdf_in_file = max((MAX_OPEN_FILE // pdf_in_page),1) * pdf_in_page | |
num_files = (total_files - 1) // pdf_in_file + 1 | |
print('As numbers of pdf ({0}) exceeds {1}, temporary files tmp_**_{2} are created.'.format( | |
total_files, MAX_OPEN_FILE, output_file)) | |
tmp_files = [] | |
for i in range(num_files): | |
tmp_file = 'tmp_'+str(i)+'_'+output_file | |
tmp_files.append(tmp_file) | |
print('Creating ' + tmp_file + '\r', end='') | |
begin = pdf_in_file * i | |
end = min(total_files, pdf_in_file * (i+1)) | |
concat_pdf_pages(input_files[begin:end], tmp_file, col, row) | |
print('Creating ' + output_file + ' \r', end='') | |
merger = PyPDF3.PdfFileMerger() | |
for file in tmp_files: | |
merger.append(file) | |
merger.write(output_file) | |
merger.close() | |
for file in tmp_files: | |
os.remove(file) | |
print('{0} was created and temporary files were removed.'.format(output_file)) | |
def concat_pdf(pages, horizontal=False): | |
"""Concatenate PDF vertically or horizontally | |
Input | |
pages: List of PDF objects of PyPDF3 | |
horizontal: Align horizontally if set true, vertically otherwise | |
Return | |
PDF object of PyPDF3 | |
""" | |
from PyPDF3.pdf import PageObject | |
if horizontal: | |
total_width = sum(page.mediaBox.upperRight[0] for page in pages) | |
total_height = max(page.mediaBox.upperRight[1] for page in pages) | |
else: | |
total_width = max(page.mediaBox.upperRight[0] for page in pages) | |
total_height = sum(page.mediaBox.upperRight[1] for page in pages) | |
new_page = PageObject.createBlankPage(None, total_width, total_height) | |
if horizontal: | |
new_page.mergePage(pages[0]) | |
offset = pages[0].mediaBox.upperRight[0] | |
else: | |
offset = total_height - pages[0].mediaBox.upperRight[1] | |
new_page.mergeTranslatedPage(pages[0], 0, offset) | |
if len(pages) > 1: | |
for i in range(1, len(pages)): | |
if horizontal: | |
new_page.mergeTranslatedPage(pages[i], offset, 0) | |
offset += pages[i].mediaBox.upperRight[0] | |
else: | |
offset -= pages[i].mediaBox.upperRight[1] | |
new_page.mergeTranslatedPage(pages[i], 0, offset) | |
return new_page | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
日本語の説明
https://sekika.github.io/2023/08/09/concatepdf/