Last active
November 20, 2022 14:52
-
-
Save swaroopjcse/6d789188a9cdb21d725767716669557f to your computer and use it in GitHub Desktop.
A simple python script to extract the files from a recursive zip archive.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
A simple python script to extract the files from a recursive zip archive. | |
This script is developed for a specific use of extracting student submissions from a submitted zip bundle, | |
but can be modified to suit some other requirements too. | |
Assumed zip structure is: | |
main_archive.zip | |
| | |
|- student1.zip | |
|- actual_files | |
|- student2.zip | |
|- actual_files | |
|- ... | |
The script extracts the actual files into individual folders named 'student1', 'student2', etc. | |
Usage: $ python extractlabs.py -i <input_zip_file> -o <output_dir> | |
@author Swaroop Joshi | |
@version 20160910 | |
""" | |
import getopt | |
import os | |
import shutil | |
import sys | |
import zipfile | |
def unzip(zip_file, dest_dir, ext_filter=''): | |
""" | |
Unzips the files from a zip file and puts them in the given destination. | |
Out put can be filtered by providing the file extension. | |
:param zip_file: zip file to extract. | |
:param dest_dir: destination folder. | |
:param ext_filter: file extension. Only the files matching this extension will be extracted. | |
:return: nothing | |
""" | |
if not os.path.exists(dest_dir): | |
os.makedirs(dest_dir) | |
try: | |
zfile = zipfile.ZipFile(zip_file) | |
for filename in zfile.namelist(): | |
ext = os.path.splitext(os.path.basename(filename))[1] | |
if (ext_filter and ext == ext_filter) or (ext_filter == ''): | |
print 'Extracting {f} to {d}'.format(f=filename, d=dest_dir) | |
zfile.extract(filename, dest_dir) | |
except zipfile.BadZipfile: | |
print 'Cannot extract {f}: Not a valid zipfile (BadZipfile Exception)'.format(f=zip_file) | |
def clean(targets): | |
""" | |
Deletes the directories in the given list of targets. | |
:param targets: List of targets to delete (relative or absolute paths). | |
""" | |
for target in targets: | |
if os.path.exists(target): | |
print 'Removing ' + target | |
shutil.rmtree(target) | |
def cmd_error(prog_name): | |
""" | |
Prints out the usage instruction and exits with error status 2 | |
:param prog_name: Name of the script to print in the usage prompt | |
""" | |
print 'Usage: $ python {prog} -i <input_file> -o <output_dir>'.format(prog=prog_name) | |
sys.exit(2) | |
def read_cmd_line(argv): | |
""" | |
Reads the command line arguments and extracts options from it. | |
:param argv: Command line arguments (including the script name) | |
:return: The input zip file and destination directory | |
""" | |
opts = [] | |
try: | |
opts, args = getopt.getopt(args=argv[1:], shortopts='hi:o:', longopts=['ifile=', 'odir=']) | |
except getopt.GetoptError: | |
cmd_error(argv[0]) | |
if not opts: | |
cmd_error(argv[0]) | |
input_file = '' | |
output_dir = '' | |
for opt, arg in opts: | |
if opt == '-h': | |
cmd_error(argv[0]) | |
elif opt in ['-i', '--ifile']: | |
input_file = arg | |
elif opt in ['-o', '--odir']: | |
output_dir = arg | |
if not input_file or not output_dir: | |
cmd_error(argv[0]) | |
return input_file, output_dir | |
def main(argv): | |
zip_file, dest_dir = read_cmd_line(argv) | |
# If the dest_dir exists, remove it | |
clean([dest_dir]) | |
# Unzip the main file first | |
unzip(zip_file, dest_dir) | |
# Then loop over each file in the dest_dir and unzip its contents locally | |
for file in os.listdir(dest_dir): | |
# Get the base name of the individual zip file without the extension | |
base = os.path.splitext(os.path.basename(file))[0] | |
unzip(zip_file=os.path.join(dest_dir, file), dest_dir=os.path.join(dest_dir, base)) | |
if __name__ == "__main__": | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment