Skip to content

Instantly share code, notes, and snippets.

@srid
Created June 6, 2009 00:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save srid/124597 to your computer and use it in GitHub Desktop.
Save srid/124597 to your computer and use it in GitHub Desktop.
import os
from os.path import abspath
import tarfile
import zipfile
from pypm.util import console
def extract_file(path, to_directory='.'):
compressedobj = CompressedFile.detect(path)
return compressedobj.extract_to(to_directory)
def make_tarball(targetfile, container_directory, recursive=True):
"""Make a tarball (type=".tar.gz") of all files under `container_directory`
and store it as `targetfile`
"""
targetfile = abspath(targetfile)
if os.path.isfile(targetfile):
console.rm(None, targetfile)
with console.change_directory(container_directory):
tarball = tarfile.open(targetfile, 'w:gz')
try:
for content in os.listdir('.'):
tarball.add(content)
finally:
tarball.close()
class InvalidFile(Exception):
"""The given compressed file is invalid. It cannot be extracted"""
__all__ = ['extract_file', 'make_tarball']
## -- internal
class CompressedFile(object):
def __init__(self, filename, ext):
self.filename = filename
self.ext = ext
@staticmethod
def detect(filename):
"""Detect ``filename`` and return the appropriate compressed file
object"""
implementors = [ZippedFile, GzipTarredFile, Bzip2TarredFile]
for implementor in implementors:
for ext in implementor.EXTENSIONS:
if filename.endswith(ext):
return implementor(filename, ext)
raise InvalidFile, 'not a valid compressed file type: {0}'.format(filename)
def extract_to(self, directory):
"""Extract all files recursively *under* ``directory``
Return the final extracted directory
"""
with console.change_directory(directory):
self._extract()
return self._possible_dir_name()
def _extract(self):
"""Extract all to current directory"""
raise NotImplementedError
def _possible_dir_name(self):
"""The directory where the the files are possibly extracted.
Usually if 'foo.tar.gz' is extracted.. then we believe the files are
actually in foo/. Is this true for all files of type .tgz/.tbz/.zip? I
don't know.
XXX: verify the correctness of this assumption
"""
return _splitext(self.filename, self.ext.count('.'))
class ZippedFile(CompressedFile):
EXTENSIONS = ['.zip']
def _extract(self):
try:
f = zipfile.ZipFile(self.filename, 'r')
try:
f.extractall()
finally:
f.close()
except (zipfile.BadZipfile, zipfile.LargeZipFile), e:
raise InvalidFile, e
class TarredFile(CompressedFile):
def _extract(self):
try:
f = tarfile.open(self.filename, self._get_mode())
try:
self._ensure_read_access(f)
f.extractall()
finally:
f.close()
except tarfile.TarError, e:
raise InvalidFile, e
def _get_mode(self):
"""Return the mode for this tarfile"""
raise NotImplementedError
def _ensure_read_access(self, tarfileobj):
"""Ensure that the given tarfile will be readable by the user after
extraction.
Some tarballs have u-x set on directories. They may as well have u-r set
on files. We reset such perms here.. so that the extracted files remain
accessible.
See also: http://bugs.python.org/issue6196
"""
EXECUTE = 0100
READ = 0400
dir_perm = EXECUTE
file_perm = EXECUTE | READ
# WARNING: if the tarfile had a huge of list of files, this could be a
# potential performance bottleneck.
for tarinfo in tarfileobj.getmembers():
tarinfo.mode |= (dir_perm if tarinfo.isdir() else file_perm)
class GzipTarredFile(TarredFile):
EXTENSIONS = ['.tar.gz', '.tgz']
def _get_mode(self):
return 'r:gz'
class Bzip2TarredFile(TarredFile):
EXTENSIONS = ['.tar.bz2', '.tbz']
def _get_mode(self):
return 'r:gz2'
def _splitext(filename, level):
"""Similar to os.path.splitext but split ``level`` number of tiles
>>> splitext('foo.tar.gz', 2)
'foo'
>>> splitext('foo.zip', 1)
'foo'
"""
name = filename
for level_no in range(level):
name = os.path.splitext(name)[0]
return name
@rompic
Copy link

rompic commented Feb 14, 2019

Thanks this really helped me today.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment