Last active
August 29, 2024 22:36
-
-
Save yukkerike/0d5d26b614fc3ae8cf2890a69f56c595 to your computer and use it in GitHub Desktop.
Optimization script for images embedded in MS Office files on macOS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import zipfile | |
import tempfile | |
import subprocess | |
import sys | |
def unpack_file(file_path, extract_to): | |
with zipfile.ZipFile(file_path, 'r') as zip_ref: | |
zip_ref.extractall(extract_to) | |
def repack_file(extract_from, file_path): | |
with zipfile.ZipFile(file_path, 'w', zipfile.ZIP_DEFLATED) as zip_ref: | |
for root, dirs, files in os.walk(extract_from): | |
for file in files: | |
file_path = os.path.join(root, file) | |
arcname = os.path.relpath(file_path, extract_from) | |
zip_ref.write(file_path, arcname) | |
def optimize_images(images_folder): | |
subprocess.call(['/Applications/ImageOptim.app/Contents/MacOS/ImageOptim', images_folder], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
def remove_tmp_dir(tmpdir): | |
for root, dirs, files in os.walk(tmpdir, topdown=False): | |
for file in files: | |
os.remove(os.path.join(root, file)) | |
for directory in dirs: | |
os.rmdir(os.path.join(root, directory)) | |
os.rmdir(tmpdir) | |
def process_file(file_path): | |
tmpdir = tempfile.mkdtemp() | |
unpack_file(file_path, tmpdir) | |
file_type = None | |
if file_path.endswith(".docx"): | |
file_type = "word" | |
elif file_path.endswith(".pptx"): | |
file_type = "powerpoint" | |
elif file_path.endswith(".xlsx"): | |
file_type = "excel" | |
if file_type: | |
images_folder = os.path.join(tmpdir, file_type, 'media') | |
if os.path.exists(images_folder): | |
optimize_images(images_folder) | |
subprocess.call(['trash', file_path]) | |
repack_file(tmpdir, file_path) | |
remove_tmp_dir(tmpdir) | |
def print_progress_bar(index, total, bar_length=40): | |
progress = index / total | |
block = int(round(bar_length * progress)) | |
text = f"\rProcessing files: [{'#' * block + '-' * (bar_length - block)}] {index}/{total}" | |
sys.stdout.write(text) | |
sys.stdout.flush() | |
def scan_and_process_folder(folder_path): | |
supported_extensions = (".docx", ".pptx", ".xlsx") | |
files_to_process = [] | |
for root, dirs, files in os.walk(folder_path): | |
for filename in files: | |
if filename.endswith(supported_extensions): | |
file_path = os.path.join(root, filename) | |
files_to_process.append(file_path) | |
total_files = len(files_to_process) | |
for index, file_path in enumerate(files_to_process, start=1): | |
process_file(file_path) | |
print_progress_bar(index, total_files) | |
print(f"\nProcessed and optimized {file_path}") | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Usage: python optimize_office_files_with_imageoptim.py <folder_path>") | |
sys.exit(1) | |
folder_path = sys.argv[1] | |
scan_and_process_folder(folder_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dependencies