Last active May 30, 2023 03:48
Modify PDF metadata with Python
# some more explanation at
import datetime
import fitz # install PyMuPDF
doc ="existing_document.pdf")
for idx, page in enumerate(doc):
while (next(page.annots())):
annot = next(page.annots())
except StopIteration:
print(f'annotations from page {idx + 1} removed')
# use more recent date format
if doc.metadata['creationDate'][-1] == "'":
doc.metadata['creationDate'] = doc.metadata['creationDate'][:-1]
# funky ISO 32000-1:2008 date format
formatted_no_utc_offset = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%SZ00'00")
doc.metadata['modDate'] = formatted_no_utc_offset
# see
doc._delXmlMetadata()"spiffy_new_document.pdf", garbage = 4)
