Created
September 25, 2022 15:24
-
-
Save ajoh504/662ad1ccd4c6e3ff1576ea09d46b415c to your computer and use it in GitHub Desktop.
PDF Paranoia - Automate the Boring Stuff With Python - CH 15
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python3 | |
# pdfparanoia.py - Walk through a directory and its subdirectories, and encrypt | |
# every PDF file. Provide the encryption password via command | |
# line and the filepath to search through. Check to see if | |
# encryption succeeded by attempting to read and decrypt the | |
# files. Prompt the user to delete the older unencrypted files. | |
# | |
# USAGE: sys.argv[1] = filepath to search for PDFs | |
# Using Python 3.10 and PyPDF2 v2.10.0 | |
# | |
# SOURCE: https://automatetheboringstuff.com/2e/chapter15/ | |
import os | |
import sys | |
import PyPDF2 | |
import getpass | |
from typing import Callable | |
class PdfEncryptor: | |
def __init__(self, password): | |
self._password = password | |
def encrypt(self, pdf_writer): | |
pdf_writer.encrypt(self._password) | |
def decrypt(self, pdf_reader): | |
pdf_reader.decrypt(self._password) | |
def encrypt_pdf(self, pdf_file: str) -> None: | |
""" | |
:param pdf_file: Input PDF file name from the calling function (search_for_pdf()). | |
If the PDF is not encrypted, create a PDFFileWriter object to encrypt | |
and save it as a new file. Encrypted files will be skipped. | |
""" | |
with PyPDF2.PdfFileReader(pdf_file) as reader: | |
if not reader.isEncrypted: | |
print(f"Encrypting {pdf_file}") | |
with PyPDF2.PdfFileWriter( | |
pdf_file.split(".")[0] + "_encrypted.pdf" | |
) as writer: | |
for page in reader.pages: | |
writer.addPage(page) | |
self.encrypt(writer) | |
def check_can_decrypt(self, pdf_file: str) -> None: | |
"""Check to see if encryption succeeded by reading and decrypting the files.""" | |
with PyPDF2.PdfFileReader(pdf_path) as reader: | |
try: | |
reader.getPage(0) | |
except PyPDF2.errors.FileNotDecryptedError: | |
print( | |
f"Encryption check 1: {pdf_file} is encrypted and cannot be read." | |
) | |
if reader.isEncrypted: | |
print( | |
f"Encryption check 2: isEncrypted() method returns True when called on {pdf_file}." | |
) | |
print(f"{pdf_file} is encrypted.\n") | |
self.decrypt(reader) | |
def search_for_pdf(dir_to_search: str) -> None: | |
""" | |
:param dir_to_search: input directory tree to walk through | |
""" | |
print(f"Searching for .pdf files in: {dir_to_search}") | |
for folder, sub_folders, file_list in os.walk(dir_to_search): | |
for file in file_list: | |
if file.split(".")[-1].lower() == "pdf": | |
yield file | |
def delete_file(pdf_file: str) -> None: | |
"""Optionally delete unencrypted files after encryption check.""" | |
with PyPDF2.PdfFileReader(pdf_file) as reader: | |
encrypted = reader.isEncrypted | |
if not encrypted: | |
os.remove(pdf_file) | |
def main(): | |
""" | |
Search for PDF files, prompt user to enter encryption password, then encrypt | |
the files. Attempt to read / decrypt the files to ensure encryption succeeded, | |
then prompt user to delete unencrypted files and exit. | |
""" | |
password = sys.argv[1] | |
encryptor = PdfEncryptor(password) | |
dir_to_search = sys.argv[2] | |
for pdf in search_for_pdf(dir_to_search): | |
encryptor.encrypt_pdf(pdf) | |
print("\nSearching for encrypted files to check.\n") | |
for pdf in search_for_pdf(dir_to_search): | |
encryptor.check_can_decrypt(pdf) | |
while True: | |
choice = input("\nDo you wish to delete all unencrypted files? Y/N\n") | |
if choice.upper() == "Y": | |
for pdf in search_for_pdf(dir_to_search): | |
delete_file(pdf) | |
sys.exit() | |
elif choice.upper() == "N": | |
sys.exit() | |
else: | |
continue | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Changes were made based on a code review at Stack Exchange