Skip to content

Instantly share code, notes, and snippets.

@carolinux
Created May 4, 2020 14:16
Show Gist options
  • Save carolinux/90c7be289d4b0a584e97d1034b04b148 to your computer and use it in GitHub Desktop.
Save carolinux/90c7be289d4b0a584e97d1034b04b148 to your computer and use it in GitHub Desktop.
# need pandoc (if want to support txt), img2pdf (if want to support images) and pdfjoin (for pdfs)
# stitch together txt, images and pdf files in a folder to an out pdf
# filenames must begin with their desired sequence number ie 1_foo.pdf 2_beach.png 4_ending_notes.txt
# there can be gaps in the sequence
import os
import glob
import sys
folder = sys.argv[1] if len(sys.argv)>1 else '.'
out_pdf = sys.argv[2] if len(sys.argv)>2 else 'out.pdf'
pdfs_to_stich = []
for ffn in glob.glob(folder+"/*"):
fn = os.path.basename(ffn)
# expecting files of the format 12_foo.jpg
parts = fn.split("_")
print(parts)
if len(parts)<2:
print(ffn+" skipped")
continue
try:
num = int(parts[0])
except:
print(ffn+" skipped")
continue
ffn_without_ext, ext = os.path.splitext(ffn)
if ext in [".png", ".jpeg", ".jpg"]:
target_pdf = ffn_without_ext + '.pdf'
if os.path.exists(target_pdf):
print(ffn+" already converted")
continue
os.system("img2pdf {} -o {}".format(ffn, target_pdf))
pdfs_to_stich.append((num, target_pdf))
elif ext == '.pdf':
pdfs_to_stich.append((num, ffn))
elif ext == '.txt':
target_pdf = ffn_without_ext + '.pdf'
if os.path.exists(target_pdf):
print(ffn+" already converted")
continue
os.system("pandoc {} -o {}".format(ffn, target_pdf))
pdfs_to_stich.append((num, target_pdf))
else:
print(ffn+" skipped")
pdfs_to_stich.sort(key=lambda x: x[0])
base_folder = os.path.basename(folder)
out = base_folder+'_'+out_pdf # creates a foo_out.pdf under foo/
cmd = "pdfjoin {} --rotateoversize 'false' --outfile {}".format(" ".join([x[1] for x in pdfs_to_stich]), os.path.join(folder, out))
print("Running cmd: {}".format(cmd))
os.system(cmd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment