Last active April 21, 2019 06:06
Export Plone site into a directory tree of PDFs.
# spin up a local instance
# portal_workflows, allow everything for Anonymous, click update portal security
# bin/instance debug
# >>> for brain in app.Plone.portal_catalog():
# >>> print brain.getURL().split('http://nohost/Plone/')[1]
# Paste paths into paths.txt
# More info on
from subprocess import check_output
import os.path
f = open("paths.txt", "r")
for path in f.readlines():
path = path[:-1] # remove \n
filename = path.split("/")[-1]
folder = "/".join(path.split("/")[:-1])
if os.path.isfile(f"output/{folder}/{filename}") or os.path.isfile(
print(f"skipping: {folder}/{filename}")
print(f"processing: {path}")
cmd = f"mkdir -p output/{folder}"
output = check_output(cmd, shell=True).decode()
if filename.endswith(("pdf", "jpg", "jpeg", "png")):
cmd = f"wget http://localhost:8080/Plone/{path}"
cmd = (
"/Applications/Google\\ Chrome "
"--headless --disable-gpu --no-margins --run-all-compositor-stages-before-draw "
f"--print-to-pdf={filename}.pdf "
check_output(cmd, shell=True, cwd=f"output/{folder}")
