Skip to content

Instantly share code, notes, and snippets.

@another-junior-dev
Last active December 4, 2020 04:32
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save another-junior-dev/990a4e622868627cb93be3d8fa2eff04 to your computer and use it in GitHub Desktop.
Save another-junior-dev/990a4e622868627cb93be3d8fa2eff04 to your computer and use it in GitHub Desktop.
A script to extract images from .zip, .docx, .xlsx, and .pptx
#!/usr/bin/python3
import os
import shutil
import logging
import argparse
import tempfile
from pathlib import Path
from zipfile import ZipFile
IMAGE_EXT = ('png', 'jpeg', 'jpg')
def extract_images(filepath, destination):
'''Function to extract images from a given file'''
def is_image(filename):
return any(filename.endswith(ext) for ext in IMAGE_EXT)
file_count = 0
overall_size = 0
data = []
try:
# Creates a temporary directory
with tempfile.TemporaryDirectory() as working_dir:
logging.info('Created temporal working directory {}'.format(working_dir))
# Unzips the images
with ZipFile(filepath) as working_zip:
image_list = [name for name in working_zip.namelist() if is_image(name)]
for x in image_list:
overall_size = overall_size + working_zip.getinfo(x).file_size
file_count = len(image_list)
working_zip.extractall(working_dir, image_list)
data.append(file_count)
data.append(overall_size)
logging.info('Extracted {} images'.format(file_count))
# Copies the extracted images to destination directory
for x in image_list:
shutil.copy(Path(working_dir).resolve() / x, destination)
logging.info('Copied {}'.format(x))
logging.info('Copied all image files to {}'.format(destination.resolve()))
return data
except Exception as e:
logging.info('File is a {}'.format(filepath.suffix))
logging.error('There was an error unzipping the file, make sure it\'s a zipped file (.zip, .docx, .xlsx, .pptx)')
logging.exception(e)
data.append(filepath.suffix)
return data
def main():
parser = argparse.ArgumentParser(prog = 'image_mod.py', description = 'Extracts images from zip, docx, xlsx, and pptx files')
parser.add_argument('filepath')
parser.add_argument('-d', '--destination', default = '.')
args = parser.parse_args()
if args.destination:
if Path(args.filepath).is_file() and Path(args.destination).is_dir():
process = extract_images(Path(args.filepath), Path(args.destination))
if len(process) == 2:
print('Operation completed successfully. {} images were extracted ({:,.2f}KB total)'.format(process[0], process[1] / 1024))
logging.info('Operation successful')
else:
print('Operation failed. File type {} not supported.'.format(process[0]))
logging.info('Operation failed')
else:
print('Operation failed. Either the file you provided or the destination directory doesn\'t exist.')
logging.info('Filename or destination directory doesn\'t exist')
logging.error('File: {}; Directory: {}'.format(Path(args.filepath).exists(), Path(args.destination).exists()))
if __name__ == "__main__":
logpath = Path(os.environ['LOCALAPPDATA']) / 'Programs' / 'image_mod'
logfile = 'image_mod.log'
if logpath.exists() is False:
Path.mkdir(logpath)
else:
logging.basicConfig(filename = logpath / logfile, level = logging.DEBUG, format = '%(asctime)s - %(name)s - %(levelname)s: %(message)s')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment