Skip to content

Instantly share code, notes, and snippets.

@jbidoret
Created August 11, 2022 19:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jbidoret/c179d6fb84cb02e1290579107965adfb to your computer and use it in GitHub Desktop.
Save jbidoret/c179d6fb84cb02e1290579107965adfb to your computer and use it in GitHub Desktop.
Remove (white) background from pdfs generated by browsers
#!/usr/bin/python3
# https://tex.stackexchange.com/questions/50069/how-to-change-white-background-of-an-included-pdf-to-transparent
# use :
# python3 removebg.py filewithbackground.pdf
# the script is not smart at all : it resizes every “full page” element that is a background
import os
import sys
import math
import subprocess
# toggle interactive mode
interactive = False
# preset: A3, no bleed, two pages
preset = {
"width": 29.7,
"height": 42.0,
"bleed": 0.0,
"pages": 2
}
def gather_info():
if not interactive:
return preset.values()
width = int(input("Page width? (millimeters)")) / 10
height = int(input("Page height? (millimeters)")) / 10
bleed = int(input("Any bleed? (millimeters)")) / 10
pages = int(input("Number of pages? "))
return (width, height, bleed, pages)
fn = sys.argv[1]
if os.path.exists(fn):
# file and temp files
bn = os.path.basename(fn)
print(u"Processing {}".format(bn))
index = bn.rfind(".")
fn_qdf=u"{}.readable.pdf".format(fn)
fn_tmp=u"{}.tempfile.pdf".format(fn)
width, height, bleed, pages = gather_info()
print(u"fn = {}, fn_tmp = {}, fn_qdf = {}, width = {}, height = {}, bleed = {}, pages = {}".format(fn, fn_tmp, fn_qdf, width, height, bleed, pages))
# compute white rect width and height
w = math.ceil((96/2.54) * (width + bleed))
h = math.ceil((96/2.54) * (height + bleed)) * pages
print(u"w = {}, h = {}".format(w, h))
# converts to readable format
cmd = "qpdf -qdf '{}' '{}'".format(fn, fn_qdf)
os.system(cmd)
# resize full page rectangle to 0 × 0
cmd = "cat '{}' | sed 's/{} {}/0 0/g' > {}".format(fn_qdf, w, h, fn_tmp)
os.system(cmd)
# fix pdf
cmd = "fix-qdf '{}' > {}".format(fn_tmp, fn)
os.system(cmd)
# remove temp files
os.remove(fn_qdf)
os.remove(fn_tmp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment