Skip to content

Instantly share code, notes, and snippets.

@swaroopjcse
Last active November 20, 2022 14:52
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save swaroopjcse/6d789188a9cdb21d725767716669557f to your computer and use it in GitHub Desktop.
Save swaroopjcse/6d789188a9cdb21d725767716669557f to your computer and use it in GitHub Desktop.
A simple python script to extract the files from a recursive zip archive.
#!/usr/bin/python
"""
A simple python script to extract the files from a recursive zip archive.
This script is developed for a specific use of extracting student submissions from a submitted zip bundle,
but can be modified to suit some other requirements too.
Assumed zip structure is:
main_archive.zip
|
|- student1.zip
|- actual_files
|- student2.zip
|- actual_files
|- ...
The script extracts the actual files into individual folders named 'student1', 'student2', etc.
Usage: $ python extractlabs.py -i <input_zip_file> -o <output_dir>
@author Swaroop Joshi
@version 20160910
"""
import getopt
import os
import shutil
import sys
import zipfile
def unzip(zip_file, dest_dir, ext_filter=''):
"""
Unzips the files from a zip file and puts them in the given destination.
Out put can be filtered by providing the file extension.
:param zip_file: zip file to extract.
:param dest_dir: destination folder.
:param ext_filter: file extension. Only the files matching this extension will be extracted.
:return: nothing
"""
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
try:
zfile = zipfile.ZipFile(zip_file)
for filename in zfile.namelist():
ext = os.path.splitext(os.path.basename(filename))[1]
if (ext_filter and ext == ext_filter) or (ext_filter == ''):
print 'Extracting {f} to {d}'.format(f=filename, d=dest_dir)
zfile.extract(filename, dest_dir)
except zipfile.BadZipfile:
print 'Cannot extract {f}: Not a valid zipfile (BadZipfile Exception)'.format(f=zip_file)
def clean(targets):
"""
Deletes the directories in the given list of targets.
:param targets: List of targets to delete (relative or absolute paths).
"""
for target in targets:
if os.path.exists(target):
print 'Removing ' + target
shutil.rmtree(target)
def cmd_error(prog_name):
"""
Prints out the usage instruction and exits with error status 2
:param prog_name: Name of the script to print in the usage prompt
"""
print 'Usage: $ python {prog} -i <input_file> -o <output_dir>'.format(prog=prog_name)
sys.exit(2)
def read_cmd_line(argv):
"""
Reads the command line arguments and extracts options from it.
:param argv: Command line arguments (including the script name)
:return: The input zip file and destination directory
"""
opts = []
try:
opts, args = getopt.getopt(args=argv[1:], shortopts='hi:o:', longopts=['ifile=', 'odir='])
except getopt.GetoptError:
cmd_error(argv[0])
if not opts:
cmd_error(argv[0])
input_file = ''
output_dir = ''
for opt, arg in opts:
if opt == '-h':
cmd_error(argv[0])
elif opt in ['-i', '--ifile']:
input_file = arg
elif opt in ['-o', '--odir']:
output_dir = arg
if not input_file or not output_dir:
cmd_error(argv[0])
return input_file, output_dir
def main(argv):
zip_file, dest_dir = read_cmd_line(argv)
# If the dest_dir exists, remove it
clean([dest_dir])
# Unzip the main file first
unzip(zip_file, dest_dir)
# Then loop over each file in the dest_dir and unzip its contents locally
for file in os.listdir(dest_dir):
# Get the base name of the individual zip file without the extension
base = os.path.splitext(os.path.basename(file))[0]
unzip(zip_file=os.path.join(dest_dir, file), dest_dir=os.path.join(dest_dir, base))
if __name__ == "__main__":
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment