-
-
Save mrpnkt/4b4654c6ead0a400cadaef6f214a9c18 to your computer and use it in GitHub Desktop.
A very bare-bones PDF metadata-clobberer Python script.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
strip-pdf-metadata.py | |
Created by FI$H 2000 on 2012-01-17. | |
Copyright (c) 2012 Objects In Space And Time, LLC. All rights reserved. | |
See also: | |
http://stackoverflow.com/questions/2574676/change-metadata-of-pdf-file-with-pypdf | |
http://two.pairlist.net/pipermail/reportlab-users/2009-November/009033.html | |
http://pybrary.net/pyPdf/ | |
""" | |
import sys | |
import os | |
from pyPdf import PdfFileWriter, PdfFileReader | |
from pyPdf.generic import NameObject, createStringObject | |
def main(): | |
OUTPUT = 'document-sanitized.pdf' | |
INPUTS = ['document.pdf',] | |
# There is no interface through pyPDF with which to set this other then getting | |
# your hands dirty like so: | |
output = PdfFileWriter() | |
infoDict = output._info.getObject() | |
infoDict.update({ | |
NameObject('/Title'): createStringObject(u'title'), | |
NameObject('/Author'): createStringObject(u'author'), | |
NameObject('/Subject'): createStringObject(u'subject'), | |
NameObject('/Creator'): createStringObject(u'a script') | |
}) | |
inputs = [PdfFileReader(file(i, "rb")) for i in INPUTS] | |
for input in inputs: | |
for page in range(input.getNumPages()): | |
output.addPage(input.getPage(page)) | |
outputStream = file(OUTPUT, 'wb') | |
output.write(outputStream) | |
outputStream.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment