Skip to content

Instantly share code, notes, and snippets.

@jiankaiwang
Created September 10, 2018 03:32
Show Gist options
  • Save jiankaiwang/395b82da9a47ddc74aefbcd90a57303e to your computer and use it in GitHub Desktop.
Save jiankaiwang/395b82da9a47ddc74aefbcd90a57303e to your computer and use it in GitHub Desktop.
secure the pdf file with a password in python
# -*- coding: utf-8 -*-
"""
Created on Tue May 8 21:13:47 2018
@author: JianKai Wang
"""
import PyPDF2
import os
import sys
import codecs
basedir = os.path.join('.')
rawdir = os.path.join(basedir, 'raw')
passdir = os.path.join(basedir, 'password')
mapfile = os.path.join(basedir,'map.csv')
if not os.path.isfile(mapfile):
print('Error: No mapping file.')
sys.exit(0)
if not os.path.isdir(passdir):
os.mkdir(passdir)
allPDFFile = next(os.walk(rawdir))[2]
def extractPwd(email):
return email.split('@')[0]
def checkPDFExistAndAddToDict(pdfname, email):
global rawdir
pdfname = pdfname + '.pdf'
if os.path.isfile(os.path.join(rawdir, pdfname)):
mapDict[pdfname] = extractPwd(email)
else:
print('Error: Lose pdf file {} and the corresponding email is {}.'\
.format(pdfname, email))
mapDict = {}
with codecs.open(mapfile, 'r', 'utf-8') as fin:
for line in fin:
tmpList = line.strip().split(',')
if tmpList[1] == 'no':
# header
continue
if len(list(mapDict.keys())) < 1:
checkPDFExistAndAddToDict(tmpList[1], tmpList[0])
continue
if len(list(mapDict.keys())) > 0 and tmpList[1] in list(mapDict.keys()) > -1:
print('Error: There is the same no.')
continue
checkPDFExistAndAddToDict(tmpList[1], tmpList[0])
if len(allPDFFile) != len(list(mapDict.keys())):
print('Warning: The amount of pdf files is not equal to the map file.')
print('Warning: Total PDF file is {}, and total listed email is {}.'.\
format(len(allPDFFile), len(list(mapDict.keys()))))
for k in list(mapDict.keys()):
input_file = os.path.join(rawdir, k)
path, filename = os.path.split(input_file)
output_file = os.path.join(passdir, "temp_" + filename)
new_output_file = os.path.join(passdir, filename)
if os.path.isfile(new_output_file):
os.remove(new_output_file)
output = PyPDF2.PdfFileWriter()
input_stream = PyPDF2.PdfFileReader(open(input_file, "rb"))
for i in range(0, input_stream.getNumPages()):
output.addPage(input_stream.getPage(i))
outputStream = open(output_file, "wb")
# Set user and owner password to pdf file
output.encrypt(mapDict[k], mapDict[k], use_128bit=True)
output.write(outputStream)
outputStream.close()
# Rename temporary output file with original filename, this
# will automatically delete temporary file
os.rename(output_file, new_output_file)
print("Securing pdf files is complete.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment