Skip to content

Instantly share code, notes, and snippets.

@fabianod
Forked from another-junior-dev/image_mod.py
Created March 7, 2019 00:45
A script to extract images from .zip, .docx, .xlsx, and .pptx
#!/usr/bin/python3
import os
import shutil
import logging
import zipfile
import argparse
import tempfile
from pathlib import Path
def extract_images(filepath, destination):
temp_path = tempfile.gettempdir()
logging.info('The default path for temporary directories and files is {}'.format(temp_path))
destination_path = Path(destination)
file_count = 0
overall_size = 0
data = []
# Extracts the images from the file to temporary directory
if zipfile.is_zipfile(filepath):
# Creates a temporary directory to work with
with tempfile.TemporaryDirectory() as working_dir:
working_dir_name = Path(working_dir).name
logging.info('Created temporal directory "{}"'.format(working_dir_name))
shutil.copy(filepath, working_dir)
logging.info('Copied {} to {}'.format(filepath.name, working_dir_name))
file = Path(working_dir) / filepath.name
# Makes a list of all images
image_list = []
for x in zipfile.ZipFile(file).namelist():
if x.endswith('.png') or x.endswith('.jpeg'):
image_list.append(x)
# Extracts images and moves them out of the temp directory
for x in image_list:
zipfile.ZipFile(file).extract(x)
logging.info('Extracted {}'.format(x))
logging.info('Extracted all image files')
for x in image_list:
shutil.copy(x, destination_path)
logging.info('Copied {}'.format(x))
logging.info('Copied all image files to {}'.format(destination_path.resolve()))
for x in image_list:
overall_size = overall_size + zipfile.ZipFile(file).getinfo(x).file_size
file_count = len(image_list)
data.append(file_count)
data.append(overall_size)
return data
else:
logging.info('File is a {}'.format(filepath.suffix))
logging.error('File cannot be accessed, must have the following extensions: zip, docx, xlsx, pptx')
data.append(filepath.suffix)
return data
def main():
parser = argparse.ArgumentParser(prog = 'image_mod.py', description = 'Extracts images from zip, docx, xlsx, and pptx files')
parser.add_argument('filepath')
parser.add_argument('-d', '--destination', default = '.')
args = parser.parse_args()
if args.destination:
if Path(args.filepath).is_file() and Path(args.destination).is_dir():
process = extract_images(Path(args.filepath), Path(args.destination))
if len(process) == 2:
print('Operation completed successfully. {} images were extracted ({:,.2f}KB total)'.format(process[0], process[1] / 1024))
logging.info('Operation successful')
else:
print('Operation failed. File type {} not supported.'.format(process[0]))
logging.info('Operation failed')
else:
print('Operation failed. Either the file you provided or the destination directory doesn\'t exist.')
logging.info('Filename or destination directory doesn\'t exist')
logging.error('File: {}; Directory: {}'.format(Path(args.filepath).exists(), Path(args.destination).exists()))
if __name__ == "__main__":
logging.basicConfig(filename = 'common_tasks.log', level = logging.DEBUG, format = '%(asctime)s - %(name)s - %(levelname)s: %(message)s')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment