Skip to content

Instantly share code, notes, and snippets.

@jmealo
Last active December 21, 2015 06:28
Show Gist options
  • Save jmealo/6264077 to your computer and use it in GitHub Desktop.
Save jmealo/6264077 to your computer and use it in GitHub Desktop.
Extracts .tar, .tar.gz, .tar.bz2 and .zip files with an optional progress bar. Raises ExtractException when it encounters an error.
import os, re, commands, subprocess
def streamCommand(cmd):
# runs a shell command and returns stdout as a stream you can loop over
stdout = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT).stdout
return stdout
def getFileExtension(filename):
# returns the file extension of a file, including archives with two dots
ext = filename.split('.')
if ext[-2] == 'tar':
return "tar." + ext[-1]
else:
return ext[-1]
def extractArchive(src, progress=True, dst=False):
file_type = getFileExtension(src).lower()
extract_commands = {
"tar": "tar -vxf",
"tar.gz": "tar -vzxf",
"tar.bz2": "tar -xvjf",
"zip": "unzip -o"
}
if file_type in extract_commands:
# check if we have a command to extract this type of file
cmd = extract_commands[file_type]
else:
# we can't extract this type of file; give up
raise ExtractException(file_type + " is an unsupported file type.")
# add filename to the command
cmd += ' ' + src
# if destination directory is specified append the necessary flags
if dst is False:
# make sure that the path actually exists
if os.path.exists(dst) is False:
raise ExtractException("Destination directory %s doesn't exist" % dst)
if file_type == 'zip':
cmd += " -d %s" % dst
else:
cmd += " -C %s" % dst
if progress is True:
# output progress to the terminal
start_line = "Extracting %s" % os.path.basename(src)
if dst is True:
start_line += " to %s" % dst
print start_line
# determine number of files in archive
if file_type != 'zip':
file_count = commands.getoutput("tar -tf %s | wc -l" % src).strip()
else:
file_count = commands.getoutput("unzip -l %s" % src)
# extracts file count from last line of unzip -l output
# ex: 196727429 4395 files
file_count = re.search('(\d+)\sfiles', file_count).group(1)
# convert file_count from string to integer
file_count = int(file_count)
# stream each line of output from extraction to update progress bar
extracted = 0
digit_length = str(len(str(file_count)))
# determine the number of leading spaces based on the file_count
for line in streamCommand(cmd):
extracted += 1
status = "Extracted %" + digit_length + "d files [%3.2f%%]"
status = status % (extracted, extracted * 100. / file_count)
status = status + chr(8) * (len(status) + 1)
print status,
# reset cursor to the next line
print("")
return extracted == file_count
else:
# extract without progress, use exit status instead of file count
# to determine whether extraction was successful
result = commands.getoutput("unzip -l %s" % src)
# an error occurred during extraction
if result[0] > 0:
raise ExtractException(result[1])
else:
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment