Skip to content

Instantly share code, notes, and snippets.

@joemerante
Last active May 30, 2023 03:48
Show Gist options
  • Save joemerante/6d30ed62edf7ac0eda124a8d262f160e to your computer and use it in GitHub Desktop.
Save joemerante/6d30ed62edf7ac0eda124a8d262f160e to your computer and use it in GitHub Desktop.
Modify PDF metadata with Python
# some more explanation at https://joemerante.blogspot.com/2020/01/changing-pdf-metadata-with-python.html
import datetime
import fitz # install PyMuPDF
doc = fitz.open("existing_document.pdf")
for idx, page in enumerate(doc):
try:
while (next(page.annots())):
annot = next(page.annots())
page.deleteAnnot(annot)
except StopIteration:
print(f'annotations from page {idx + 1} removed')
# use more recent date format
# https://stackoverflow.com/questions/41661477/what-is-the-correct-format-of-a-date-string
if doc.metadata['creationDate'][-1] == "'":
doc.metadata['creationDate'] = doc.metadata['creationDate'][:-1]
# funky ISO 32000-1:2008 date format
formatted_no_utc_offset = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%SZ00'00")
doc.metadata['modDate'] = formatted_no_utc_offset
doc.setMetadata(doc.metadata)
# see https://pymupdf.readthedocs.io/en/latest/document/#setmetadata-example
doc._delXmlMetadata()
doc.save("spiffy_new_document.pdf", garbage = 4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment