Last active
September 17, 2020 12:23
-
-
Save yogendratamang48/eba436cdcd5968cac2d98a6bd6f4713e to your computer and use it in GitHub Desktop.
pdf merger script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
merges pdf files provided in given directory | |
### Install | |
`pip install pypdf2` | |
### Merging files | |
- provide files using `-f or --files flag` | |
>`python3 merger.py --files 1_OriginalAndFirstExt.pdf 2_OriginalAndFirstExt.pdf ` | |
### Merging files from directory | |
- provide input directory (`-i or --input_dir`) which contains pdf | |
>`python3 merger.py --input_dir inputs --output apple.pdf` | |
### You can provide output directory using `-o or --output_file` flag | |
""" | |
import os | |
from PyPDF2 import PdfFileMerger | |
import logging | |
import argparse | |
logger = logging.getLogger() | |
OUTPUT_DIR = 'merged' | |
OUTPUT_FILE = f'{OUTPUT_DIR}/merged.pdf' | |
INPUT_DIR = f'inputs' | |
def main(): | |
global logger | |
logger.setLevel(logging.DEBUG) | |
ch = logging.StreamHandler() | |
ch.setLevel(logging.INFO) | |
formatter = logging.Formatter( | |
"%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
) | |
ch.setFormatter(formatter) | |
logger.addHandler(ch) | |
if not os.path.exists(OUTPUT_DIR): | |
os.mkdir(OUTPUT_DIR) | |
if os.path.exists(OUTPUT_FILE): | |
try: | |
os.remove(OUTPUT_FILE) | |
except: pass | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"-i", "--input_dir", | |
type=str, | |
help="Directory whose pdf are to be merged" | |
) | |
parser.add_argument( | |
"-f", "--files", | |
nargs='+', | |
help="provide list of files with -f flag" | |
) | |
parser.add_argument( | |
"-o", "--output_file", | |
type=str, | |
help="output file name" | |
) | |
args = parser.parse_args() | |
input_dir = args.input_dir | |
input_files = args.files | |
output_file = args.output_file | |
if input_dir or input_files: | |
merger = PdfFileMerger(strict=False) | |
if input_files: | |
logger.info(f"Merging files: {input_files}") | |
for _file in input_files: | |
merger.append(_file) | |
else: | |
if input_dir.endswith('/'): | |
input_dir = input_dir.strip('/') | |
logger.info(f"Merging files from {input_dir} directory") | |
for item in os.listdir(input_dir): | |
if item.endswith('pdf'): | |
merger.append(f'{input_dir}/{item}') | |
if output_file: | |
merger.write(output_file) | |
logger.info(f"Merged into: {output_file}") | |
else: | |
merger.write(OUTPUT_FILE) | |
logger.info(f"Merged into: {OUTPUT_FILE}") | |
merger.close() | |
else: | |
logger.error(f"ERR: --input or --files arguments") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment