Created
December 26, 2011 16:07
-
-
Save ssokolow/1521494 to your computer and use it in GitHub Desktop.
Simple script for converting a set of PNGs into a PDF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""A simple tool for converting batches of PNG pages into a PDF file. | |
Usage: | |
1. Drop into the folder with the PNGs. | |
2. chmod +x png2pdf.py | |
3. Double-click it. | |
Requires: | |
- PyPDF 1.13+ | |
- ImageMagick (in the PATH) | |
""" | |
__appname__ = "png2pdf" | |
__author__ = "Stephan Sokolow (deitarion/SSokolow)" | |
__version__ = "0.3" | |
__license__ = "GNU GPL 3.0 or later" | |
import logging | |
log = logging.getLogger(__name__) | |
import os, pyPdf, subprocess, sys | |
class Empty(object): | |
pass | |
job = Empty() | |
job.terminal = Empty() | |
job.terminal.feed = sys.stderr.write | |
job.feed_status = lambda x: job.terminal.feed('\r\n%s\r\n' % x) | |
def magick_pngs_to_pdfs(job, data): | |
# TODO: Decide how to handle removing PDFs for which the PNGs have been | |
# deleted. | |
dirn = os.path.split(data[0])[0] | |
outdir = os.path.join(dirn, '_build') | |
#TODO: What if it's not a dir? | |
if not os.path.exists(outdir): | |
os.makedirs(outdir) | |
outpaths = [] | |
for path in data: | |
outname = os.path.splitext(os.path.split(path)[1])[0] + '.pdf' | |
outpath = os.path.join(outdir, outname) | |
in_stat = os.stat(path) | |
try: | |
out_stat = os.stat(outpath) | |
except OSError: | |
out_stat = None | |
if not (out_stat and in_stat.st_mtime == out_stat.st_mtime): | |
subprocess.check_call(['convert', '-monitor', path, outpath]) | |
os.utime(outpath, (getattr(out_stat, 'st_atime', in_stat.st_atime), in_stat.st_mtime)) | |
else: | |
log.info("Generated PDF already up to date: %s", outpath) | |
outpaths.append(outpath) | |
return outpaths | |
def reduce_pdfs_to_pdf(job, data, outpath): | |
"""Bundle many PDFs into one without altering page dimensions. | |
Resources in case this approach fails: | |
- https://www.linux.com/news/software/applications/8229-putting-together-pdf-files | |
- http://milan.kupcevic.net/ghostscript-ps-pdf/ | |
- http://www.ghostscript.com/doc/current/Devices.htm | |
""" | |
dimensions, failed = None, [] | |
pieces, pdf_out, handles = len(data), pyPdf.PdfFileWriter(), [] | |
try: | |
for idx, path in enumerate(data): | |
#TODO: Probably a good idea to have another wrapper for status | |
# outputs which self-overwrite using \r | |
job.feed_status("Queueing operations for final PDF: Piece %d of %d" % (idx, pieces)) | |
file_in = file(path, 'rb') | |
handles.append(file_in) | |
pdf_in = pyPdf.PdfFileReader(file_in) | |
for pidx, page in enumerate(pdf_in.pages): | |
try: | |
if dimensions: | |
assert page.mediaBox == dimensions | |
else: | |
dimensions = page.mediaBox | |
#TODO: Need to make the task progress bar definite. | |
pdf_out.addPage(page) | |
except AssertionError: | |
failed.append((pidx, path)) | |
if failed: | |
raise AssertionError("Page sizes don't match first page:\n\t%s" % | |
'\n\t'.join(['page %d of file %s' % x for x in failed])) | |
with file(outpath, 'wb') as file_out: | |
job.terminal.feed("\r\nBuilding %s (%d pages from %d source files)\r\n" % | |
(outpath, pdf_out.getNumPages(), pieces)) | |
pdf_out.write(file_out) | |
finally: | |
for fh in handles: | |
fh.close() | |
def bundle_pngs(args, outpath): | |
pdf_args = magick_pngs_to_pdfs(job, args) | |
reduce_pdfs_to_pdf(job, pdf_args, outpath) | |
return outpath | |
if __name__ == '__main__': | |
from optparse import OptionParser | |
parser = OptionParser(version="%%prog v%s" % __version__, | |
usage="%prog [options] <argument> ...", | |
description=__doc__.replace('\r\n','\n').split('\n--snip--\n')[0]) | |
parser.add_option('-v', '--verbose', action="count", dest="verbose", | |
default=3, help="Increase the verbosity.") | |
parser.add_option('-q', '--quiet', action="count", dest="quiet", | |
default=0, help="Decrease the verbosity. Can be used thrice for extra effect.") | |
parser.add_option('--go', action="store_true", dest="go", | |
default=False, help="Used for two-stage startup with no arguments") | |
opts, args = parser.parse_args() | |
# Set up clean logging to stderr | |
log_levels = [logging.CRITICAL, logging.ERROR, logging.WARNING, | |
logging.INFO, logging.DEBUG] | |
opts.verbose = min(opts.verbose - opts.quiet, len(log_levels) - 1) | |
opts.verbose = max(opts.verbose, 0) | |
logging.basicConfig(level=log_levels[opts.verbose], | |
format='%(levelname)s: %(message)s') | |
if not args: | |
if not opts.go: | |
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0]))) | |
os.execlp('xterm', 'xterm', '-hold', '-e', sys.argv[0], '--go') | |
log.info("No files specified. Using all PNGs in the current directory") | |
args = [x for x in os.listdir('.') if x.lower().endswith('.png')] | |
args.sort() | |
#TODO: Use a more natural sorting algorithm. | |
if args: | |
#TODO: Add a -d/--directories option so I can easily batch-convert multiple books. | |
bundle_pngs(args, 'book.pdf') | |
print "Done." | |
else: | |
print "No files found." |
faraonchis
commented
Aug 31, 2018
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment