Skip to content

Instantly share code, notes, and snippets.

@mara004
Last active July 11, 2024 20:43
Show Gist options
  • Save mara004/8ef3a803531fdd42b29bbfa2889ff7f3 to your computer and use it in GitHub Desktop.
Save mara004/8ef3a803531fdd42b29bbfa2889ff7f3 to your computer and use it in GitHub Desktop.
PDF rendering with Ghostscript (via subprocess)
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-FileCopyrightText: 2024 James R. Barlow <james@purplerock.ca>
# SPDX-License-Identifier: MPL-2.0
# Initial code derived from ocrmypdf/_exec/ghostscript.py
# Note that Ghostscript is AGPL-licensed. However, we are calling it via subprocess here, so not sure whether copyleft would apply.
# See also https://www.gnu.org/licenses/gpl-faq.en.html#MereAggregation
import io
import os
import re
import sys
import shutil
import logging
import subprocess
import PIL.Image
logger = logging.getLogger(__name__)
def get_ghostscript():
# TODO consider searching the windows registry, as ocrmypdf does
# https://github.com/jbarlow83/OCRmyPDF/blob/master/src/ocrmypdf/subprocess/_windows.py
if sys.platform.startswith('win32'):
gs = shutil.which('gswin64c')
if not gs:
gs = shutil.which('gswin32c')
else:
gs = shutil.which('gs')
return gs
def _gs_error_reported(stream):
if re.search('error', stream, flags=re.IGNORECASE):
return True
else:
return False
def _gs_rasterise_pdf(
input_file,
*,
pageno,
raster_dpi,
password = None,
raster_device = 'png16m',
):
"""
Rasterize one page of a PDF at resolution *raster_dpi*.
*pageno* is the visual (1-based) page number.
Note that Ghostscript takes /UserUnit into account on its own.
"""
raster_dpi = round(raster_dpi, 6)
gs = get_ghostscript()
if not gs:
raise RuntimeError("Ghostscript could not be found. Make sure it is installed and added to $PATH.")
args_gs = []
args_gs.extend(
[
gs,
'-dQUIET',
'-dSAFER',
'-dBATCH',
'-dNOPAUSE',
'-dNOPROMPT',
f'-sDEVICE={raster_device}',
f'-dFirstPage={pageno}',
f'-dLastPage={pageno}',
f'-r{raster_dpi:f}x{raster_dpi:f}',
'-dTextAlphaBits=4',
'-dGraphicsAlphaBits=4',
'-dInterpolateControl=-1',
]
)
if password is not None:
args_gs.append(f'-sPDFPassword={password}')
args_gs.extend(
[
'-o',
'-',
'-sstdout=%stderr',
'-dAutoRotatePages=/None',
'-f',
os.fspath(input_file),
]
)
#logger.debug(args_gs)
try:
pipe = subprocess.run(
args_gs,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
check = True,
)
except subprocess.CalledProcessError as error_msg:
logger.error(error_msg.stderr.decode(errors='replace'))
raise RuntimeError('Ghostscript rasterizing failed')
else:
stderr = pipe.stderr.decode(errors='replace')
if _gs_error_reported(stderr):
logger.error(stderr)
return PIL.Image.open( io.BytesIO(pipe.stdout) )
def invoke_ghostscript_shell(filepath, index, scale=4, password=None):
# Note, this does not support rotation.
return _gs_rasterise_pdf(
filepath,
password = password,
pageno = index + 1,
raster_dpi = scale * 72,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment