Created
June 6, 2009 00:24
-
-
Save srid/124597 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from os.path import abspath | |
import tarfile | |
import zipfile | |
from pypm.util import console | |
def extract_file(path, to_directory='.'): | |
compressedobj = CompressedFile.detect(path) | |
return compressedobj.extract_to(to_directory) | |
def make_tarball(targetfile, container_directory, recursive=True): | |
"""Make a tarball (type=".tar.gz") of all files under `container_directory` | |
and store it as `targetfile` | |
""" | |
targetfile = abspath(targetfile) | |
if os.path.isfile(targetfile): | |
console.rm(None, targetfile) | |
with console.change_directory(container_directory): | |
tarball = tarfile.open(targetfile, 'w:gz') | |
try: | |
for content in os.listdir('.'): | |
tarball.add(content) | |
finally: | |
tarball.close() | |
class InvalidFile(Exception): | |
"""The given compressed file is invalid. It cannot be extracted""" | |
__all__ = ['extract_file', 'make_tarball'] | |
## -- internal | |
class CompressedFile(object): | |
def __init__(self, filename, ext): | |
self.filename = filename | |
self.ext = ext | |
@staticmethod | |
def detect(filename): | |
"""Detect ``filename`` and return the appropriate compressed file | |
object""" | |
implementors = [ZippedFile, GzipTarredFile, Bzip2TarredFile] | |
for implementor in implementors: | |
for ext in implementor.EXTENSIONS: | |
if filename.endswith(ext): | |
return implementor(filename, ext) | |
raise InvalidFile, 'not a valid compressed file type: {0}'.format(filename) | |
def extract_to(self, directory): | |
"""Extract all files recursively *under* ``directory`` | |
Return the final extracted directory | |
""" | |
with console.change_directory(directory): | |
self._extract() | |
return self._possible_dir_name() | |
def _extract(self): | |
"""Extract all to current directory""" | |
raise NotImplementedError | |
def _possible_dir_name(self): | |
"""The directory where the the files are possibly extracted. | |
Usually if 'foo.tar.gz' is extracted.. then we believe the files are | |
actually in foo/. Is this true for all files of type .tgz/.tbz/.zip? I | |
don't know. | |
XXX: verify the correctness of this assumption | |
""" | |
return _splitext(self.filename, self.ext.count('.')) | |
class ZippedFile(CompressedFile): | |
EXTENSIONS = ['.zip'] | |
def _extract(self): | |
try: | |
f = zipfile.ZipFile(self.filename, 'r') | |
try: | |
f.extractall() | |
finally: | |
f.close() | |
except (zipfile.BadZipfile, zipfile.LargeZipFile), e: | |
raise InvalidFile, e | |
class TarredFile(CompressedFile): | |
def _extract(self): | |
try: | |
f = tarfile.open(self.filename, self._get_mode()) | |
try: | |
self._ensure_read_access(f) | |
f.extractall() | |
finally: | |
f.close() | |
except tarfile.TarError, e: | |
raise InvalidFile, e | |
def _get_mode(self): | |
"""Return the mode for this tarfile""" | |
raise NotImplementedError | |
def _ensure_read_access(self, tarfileobj): | |
"""Ensure that the given tarfile will be readable by the user after | |
extraction. | |
Some tarballs have u-x set on directories. They may as well have u-r set | |
on files. We reset such perms here.. so that the extracted files remain | |
accessible. | |
See also: http://bugs.python.org/issue6196 | |
""" | |
EXECUTE = 0100 | |
READ = 0400 | |
dir_perm = EXECUTE | |
file_perm = EXECUTE | READ | |
# WARNING: if the tarfile had a huge of list of files, this could be a | |
# potential performance bottleneck. | |
for tarinfo in tarfileobj.getmembers(): | |
tarinfo.mode |= (dir_perm if tarinfo.isdir() else file_perm) | |
class GzipTarredFile(TarredFile): | |
EXTENSIONS = ['.tar.gz', '.tgz'] | |
def _get_mode(self): | |
return 'r:gz' | |
class Bzip2TarredFile(TarredFile): | |
EXTENSIONS = ['.tar.bz2', '.tbz'] | |
def _get_mode(self): | |
return 'r:gz2' | |
def _splitext(filename, level): | |
"""Similar to os.path.splitext but split ``level`` number of tiles | |
>>> splitext('foo.tar.gz', 2) | |
'foo' | |
>>> splitext('foo.zip', 1) | |
'foo' | |
""" | |
name = filename | |
for level_no in range(level): | |
name = os.path.splitext(name)[0] | |
return name |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks this really helped me today.