Skip to content

Instantly share code, notes, and snippets.

@ajoh504
Created September 25, 2022 15:24
Show Gist options
  • Save ajoh504/662ad1ccd4c6e3ff1576ea09d46b415c to your computer and use it in GitHub Desktop.
Save ajoh504/662ad1ccd4c6e3ff1576ea09d46b415c to your computer and use it in GitHub Desktop.
PDF Paranoia - Automate the Boring Stuff With Python - CH 15
#!python3
# pdfparanoia.py - Walk through a directory and its subdirectories, and encrypt
# every PDF file. Provide the encryption password via command
# line and the filepath to search through. Check to see if
# encryption succeeded by attempting to read and decrypt the
# files. Prompt the user to delete the older unencrypted files.
#
# USAGE: sys.argv[1] = filepath to search for PDFs
# Using Python 3.10 and PyPDF2 v2.10.0
#
# SOURCE: https://automatetheboringstuff.com/2e/chapter15/
import os
import sys
import PyPDF2
import getpass
from typing import Callable
class PdfEncryptor:
def __init__(self, password):
self._password = password
def encrypt(self, pdf_writer):
pdf_writer.encrypt(self._password)
def decrypt(self, pdf_reader):
pdf_reader.decrypt(self._password)
def encrypt_pdf(self, pdf_file: str) -> None:
"""
:param pdf_file: Input PDF file name from the calling function (search_for_pdf()).
If the PDF is not encrypted, create a PDFFileWriter object to encrypt
and save it as a new file. Encrypted files will be skipped.
"""
with PyPDF2.PdfFileReader(pdf_file) as reader:
if not reader.isEncrypted:
print(f"Encrypting {pdf_file}")
with PyPDF2.PdfFileWriter(
pdf_file.split(".")[0] + "_encrypted.pdf"
) as writer:
for page in reader.pages:
writer.addPage(page)
self.encrypt(writer)
def check_can_decrypt(self, pdf_file: str) -> None:
"""Check to see if encryption succeeded by reading and decrypting the files."""
with PyPDF2.PdfFileReader(pdf_path) as reader:
try:
reader.getPage(0)
except PyPDF2.errors.FileNotDecryptedError:
print(
f"Encryption check 1: {pdf_file} is encrypted and cannot be read."
)
if reader.isEncrypted:
print(
f"Encryption check 2: isEncrypted() method returns True when called on {pdf_file}."
)
print(f"{pdf_file} is encrypted.\n")
self.decrypt(reader)
def search_for_pdf(dir_to_search: str) -> None:
"""
:param dir_to_search: input directory tree to walk through
"""
print(f"Searching for .pdf files in: {dir_to_search}")
for folder, sub_folders, file_list in os.walk(dir_to_search):
for file in file_list:
if file.split(".")[-1].lower() == "pdf":
yield file
def delete_file(pdf_file: str) -> None:
"""Optionally delete unencrypted files after encryption check."""
with PyPDF2.PdfFileReader(pdf_file) as reader:
encrypted = reader.isEncrypted
if not encrypted:
os.remove(pdf_file)
def main():
"""
Search for PDF files, prompt user to enter encryption password, then encrypt
the files. Attempt to read / decrypt the files to ensure encryption succeeded,
then prompt user to delete unencrypted files and exit.
"""
password = sys.argv[1]
encryptor = PdfEncryptor(password)
dir_to_search = sys.argv[2]
for pdf in search_for_pdf(dir_to_search):
encryptor.encrypt_pdf(pdf)
print("\nSearching for encrypted files to check.\n")
for pdf in search_for_pdf(dir_to_search):
encryptor.check_can_decrypt(pdf)
while True:
choice = input("\nDo you wish to delete all unencrypted files? Y/N\n")
if choice.upper() == "Y":
for pdf in search_for_pdf(dir_to_search):
delete_file(pdf)
sys.exit()
elif choice.upper() == "N":
sys.exit()
else:
continue
if __name__ == "__main__":
main()
@ajoh504
Copy link
Author

ajoh504 commented Sep 25, 2022

Changes were made based on a code review at Stack Exchange

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment