Skip to content

Instantly share code, notes, and snippets.

@spthm
Forked from agarciadom/find-color-pages
Last active January 24, 2018 21:10
Show Gist options
  • Save spthm/9c2f791e290904429a9496e894cbb6d1 to your computer and use it in GitHub Desktop.
Save spthm/9c2f791e290904429a9496e894cbb6d1 to your computer and use it in GitHub Desktop.
Small Python 2.7+ script that lists the color pages in a PDF along with their CMYK ink mixes, as computed by the 'inkcov' device in Ghostcript 9.05+. "find-color-pages file.pdf" lists the color pages and their CMYK ink mixes, "find-color-pages -c file.pdf" prints the number of color pages in the PDF and "find-color-pages -C 0.39 -B 0.04 file.pdf…
#!/usr/bin/env python
# Simple script for finding and counting the color pages in a PDF
# Copyright (C) 2013 Antonio Garcia-Dominguez
# Licensed under the GPLv3
#
# This script is based on the following thread (thanks for the tip!):
#
# http://tex.stackexchange.com/questions/53493
from __future__ import print_function
import logging
import re
import subprocess
from os import path, access, R_OK
VERSION = "1.0.4"
RE_FLOAT = re.compile("[01].[0-9]+")
CMYK_NCOLORS = 4
logging.basicConfig(level=logging.ERROR)
def is_color(c, m, y, k):
return c > 0 or m > 0 or y > 0
def cmyk_per_page(pdf_file):
if not path.isfile(pdf_file):
raise Exception("{} does not exist or is not a file".format(pdf_file))
if not access(pdf_file, R_OK):
raise Exception("{} is not readable".format(pdf_file))
gs_inkcov = subprocess.Popen(
["gs", "-o", "-", "-sDEVICE=inkcov", pdf_file],
stdout=subprocess.PIPE)
for raw_line in iter(gs_inkcov.stdout.readline, b''):
line = raw_line.decode('utf8').rstrip()
logging.debug("Read line %s", line)
fields = line.split()
if (len(fields) >= CMYK_NCOLORS
and all(RE_FLOAT.match(fields[i]) for i in range(CMYK_NCOLORS))):
cmyk = tuple(float(value) for value in fields[0:CMYK_NCOLORS])
logging.debug("Extracted fields %s", cmyk)
yield cmyk
def count_page_types(pdf_file):
nb, nc = 0, 0
for page in cmyk_per_page(pdf_file):
if is_color(*page):
nc += 1
else:
nb += 1
return (nb, nc)
def find_color_pages(pdf_file):
for n, page in enumerate(cmyk_per_page(pdf_file), 1):
if is_color(*page):
logging.debug("Page %d is a color page", n)
yield (n, page)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="""Lists or counts the
colour pages of a PDF file on standard output. The utility
requires having the 'gs' tool from Ghostscript 9.05 or later
installed and available through the PATH.""", version=VERSION)
parser.add_argument("file", help="PDF file to be analyzed")
parser.add_argument("--count", "-c", action='store_true',
help="Print the number of pages instead of listing them")
parser.add_argument("--csv", "-s", action='store_true',
help="Output color pages as comma-separated list")
parser.add_argument("--debug", "-d", action='store_true',
help="Enables verbose debugging output")
parser.add_argument("--noheader", "-H", action='store_true',
help="Disables the first header line")
parser.add_argument("--pcolor", "-C", metavar="PC", type=float,
help="Color page price (for total cost report, " +
"requires --pblack as well)")
parser.add_argument("--pblack", "-B", metavar="PB", type=float,
help="B/W page price (for total cost report, " +
"requires --pcolor as well)")
args = parser.parse_args()
if args.debug:
logging.getLogger('').setLevel(logging.DEBUG)
if args.pcolor is not None and args.pblack is None:
raise Exception(
"Page price was specified for color but not for B/W pages")
if args.pblack is not None and args.pcolor is None:
raise Exception(
"Page price was specified for B/W but not for color pages")
if args.count:
print(count_page_types(args.file)[1])
elif args.csv:
for n, cmyk in find_color_pages(args.file):
print(str(n), end=', ')
print()
elif args.pcolor is not None and args.pblack is not None:
nb, nc = count_page_types(args.file)
total_cost = args.pblack * nb + args.pcolor * nc
print(("Total cost ({0:d} B/W @ {1:3.6g}/page "
+ "and {2:d} color @ {3:3.6g}/page): {4:3.6g}")
.format(nb, args.pblack, nc, args.pcolor, total_cost))
else:
if not args.noheader:
print("\t".join(("n", "c", "m", "y", "k")))
for n, cmyk in find_color_pages(args.file):
print("\t".join((str(s) for s in (n,) + cmyk)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment