Skip to content

Instantly share code, notes, and snippets.

@antlauzon
Created November 29, 2018 03:02
Show Gist options
  • Save antlauzon/213c75d8c0334faccc77a9f47ed882c5 to your computer and use it in GitHub Desktop.
Save antlauzon/213c75d8c0334faccc77a9f47ed882c5 to your computer and use it in GitHub Desktop.
Recursively Unpack Archives
#!/usr/bin/env python3
import glob
import os
import logging
import multiprocessing
import re
import shutil
import sys
from pyunpack import Archive
logging.basicConfig(level=logging.WARN)
ARCHIVE_RE = re.compile(".*(\.7z|\.ace|\.alz|\.a|\.arc|\.arj|\.tar\.bz2|\.tar\.gz|\.tbz2|\.tgz|\.bz2|\.cab|\.Z|\.cpio|\.deb|\.dms|\.gz|\.lrz|\.lha|\.lzh\.lz|\.lzma|\.lzo|\.rpm|\.rar|\.rz|\.tar|\.xz|\.zip|\.jar|\.zoo)")
ARCHIVE_SINK_UNPACKED = "./unpack_success"
ARCHIVE_SINK_ERROR = "./unpack_error"
os.makedirs(ARCHIVE_SINK_UNPACKED, exist_ok=True)
os.makedirs(ARCHIVE_SINK_ERROR, exist_ok=True)
def get_archives(path):
for f in glob.glob(path + '/**/*.*', recursive=True):
if ARCHIVE_RE.match(f) and os.path.isfile(f):
yield f
def move_archive(archive, path):
new_archive_path = "{}/{}".format(path, os.path.basename(archive))
if os.path.exists(new_archive_path):
os.remove(new_archive_path)
os.makedirs(path, exist_ok=True)
shutil.move(archive, path)
def unpack(archive):
archive_dir = os.path.dirname(archive)
archive_unpack_dir = "{}/{}".format(archive_dir, '_unpack')
os.makedirs(archive_unpack_dir, exist_ok=True)
try:
logging.info("UNARCHIVING: {}".format(archive))
Archive(archive).extractall(archive_unpack_dir)
except:
move_archive(archive, "{}/{}".format(ARCHIVE_SINK_ERROR, archive_dir))
logging.info("UNARCHIVE_FAILURE: {}".format(archive))
return
logging.info("UNARCHIVE_SUCCESS: {}".format(archive))
move_archive(archive, "{}/{}".format(ARCHIVE_SINK_UNPACKED, archive_dir))
for sub_archive in get_archives(archive_unpack_dir):
unpack(sub_archive)
def main():
with multiprocessing.Pool(16) as pool:
pool.map(unpack, get_archives(sys.argv[1]))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment