Skip to content

Instantly share code, notes, and snippets.

@yukkerike
Last active August 29, 2024 22:36
Show Gist options
  • Save yukkerike/0d5d26b614fc3ae8cf2890a69f56c595 to your computer and use it in GitHub Desktop.
Save yukkerike/0d5d26b614fc3ae8cf2890a69f56c595 to your computer and use it in GitHub Desktop.
Optimization script for images embedded in MS Office files on macOS
import os
import zipfile
import tempfile
import subprocess
import sys
def unpack_file(file_path, extract_to):
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(extract_to)
def repack_file(extract_from, file_path):
with zipfile.ZipFile(file_path, 'w', zipfile.ZIP_DEFLATED) as zip_ref:
for root, dirs, files in os.walk(extract_from):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, extract_from)
zip_ref.write(file_path, arcname)
def optimize_images(images_folder):
subprocess.call(['/Applications/ImageOptim.app/Contents/MacOS/ImageOptim', images_folder], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def remove_tmp_dir(tmpdir):
for root, dirs, files in os.walk(tmpdir, topdown=False):
for file in files:
os.remove(os.path.join(root, file))
for directory in dirs:
os.rmdir(os.path.join(root, directory))
os.rmdir(tmpdir)
def process_file(file_path):
tmpdir = tempfile.mkdtemp()
unpack_file(file_path, tmpdir)
file_type = None
if file_path.endswith(".docx"):
file_type = "word"
elif file_path.endswith(".pptx"):
file_type = "powerpoint"
elif file_path.endswith(".xlsx"):
file_type = "excel"
if file_type:
images_folder = os.path.join(tmpdir, file_type, 'media')
if os.path.exists(images_folder):
optimize_images(images_folder)
subprocess.call(['trash', file_path])
repack_file(tmpdir, file_path)
remove_tmp_dir(tmpdir)
def print_progress_bar(index, total, bar_length=40):
progress = index / total
block = int(round(bar_length * progress))
text = f"\rProcessing files: [{'#' * block + '-' * (bar_length - block)}] {index}/{total}"
sys.stdout.write(text)
sys.stdout.flush()
def scan_and_process_folder(folder_path):
supported_extensions = (".docx", ".pptx", ".xlsx")
files_to_process = []
for root, dirs, files in os.walk(folder_path):
for filename in files:
if filename.endswith(supported_extensions):
file_path = os.path.join(root, filename)
files_to_process.append(file_path)
total_files = len(files_to_process)
for index, file_path in enumerate(files_to_process, start=1):
process_file(file_path)
print_progress_bar(index, total_files)
print(f"\nProcessed and optimized {file_path}")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python optimize_office_files_with_imageoptim.py <folder_path>")
sys.exit(1)
folder_path = sys.argv[1]
scan_and_process_folder(folder_path)
@yukkerike
Copy link
Author

Dependencies

brew install trash
brew install --cask imageoptim

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment