srid/compression.py

## compression.py
import os
from os.path import abspath
import tarfile
import zipfile

from pypm.util import console


def extract_file(path, to_directory='.'):
    compressedobj = CompressedFile.detect(path)
    return compressedobj.extract_to(to_directory)

def make_tarball(targetfile, container_directory, recursive=True):
    """Make a tarball (type=".tar.gz") of all files under `container_directory`
    and store it as `targetfile`
    """
    targetfile = abspath(targetfile)
    if os.path.isfile(targetfile):
        console.rm(None, targetfile)

    with console.change_directory(container_directory):
        tarball = tarfile.open(targetfile, 'w:gz')
        try:
            for content in os.listdir('.'):
                tarball.add(content)
        finally:
            tarball.close()

class InvalidFile(Exception):
    """The given compressed file is invalid. It cannot be extracted"""

__all__ = ['extract_file', 'make_tarball']


## -- internal

class CompressedFile(object):

    def __init__(self, filename, ext):
        self.filename = filename
        self.ext = ext

    @staticmethod
    def detect(filename):
        """Detect ``filename`` and return the appropriate compressed file
        object"""
        implementors = [ZippedFile, GzipTarredFile, Bzip2TarredFile]
        for implementor in implementors:
            for ext in implementor.EXTENSIONS:
                if filename.endswith(ext):
                    return implementor(filename, ext)
        raise InvalidFile, 'not a valid compressed file type: {0}'.format(filename)

    def extract_to(self, directory):
        """Extract all files recursively *under* ``directory``

        Return the final extracted directory
        """
        with console.change_directory(directory):
            self._extract()
            return self._possible_dir_name()

    def _extract(self):
        """Extract all to current directory"""
        raise NotImplementedError

    def _possible_dir_name(self):
        """The directory where the the files are possibly extracted.

        Usually if 'foo.tar.gz' is extracted.. then we believe the files are
        actually in foo/. Is this true for all files of type .tgz/.tbz/.zip? I
        don't know.

        XXX: verify the correctness of this assumption
        """
        return _splitext(self.filename, self.ext.count('.'))

class ZippedFile(CompressedFile):

    EXTENSIONS = ['.zip']

    def _extract(self):
        try:
            f = zipfile.ZipFile(self.filename, 'r')
            try:
                f.extractall()
            finally:
                f.close()
        except (zipfile.BadZipfile, zipfile.LargeZipFile), e:
            raise InvalidFile, e

class TarredFile(CompressedFile):

    def _extract(self):
        try:
            f = tarfile.open(self.filename, self._get_mode())
            try:
                self._ensure_read_access(f)
                f.extractall()
            finally:
                f.close()
        except tarfile.TarError, e:
            raise InvalidFile, e

    def _get_mode(self):
        """Return the mode for this tarfile"""
        raise NotImplementedError

    def _ensure_read_access(self, tarfileobj):
        """Ensure that the given tarfile will be readable by the user after
        extraction.

        Some tarballs have u-x set on directories. They may as well have u-r set
        on files. We reset such perms here.. so that the extracted files remain
        accessible.

        See also: http://bugs.python.org/issue6196
        """
        EXECUTE = 0100
        READ = 0400
        dir_perm = EXECUTE
        file_perm = EXECUTE | READ

        # WARNING: if the tarfile had a huge of list of files, this could be a
        # potential performance bottleneck.
        for tarinfo in tarfileobj.getmembers():
            tarinfo.mode |= (dir_perm if tarinfo.isdir() else file_perm)


class GzipTarredFile(TarredFile):

    EXTENSIONS = ['.tar.gz', '.tgz']

    def _get_mode(self):
        return 'r:gz'

class Bzip2TarredFile(TarredFile):

    EXTENSIONS = ['.tar.bz2', '.tbz']

    def _get_mode(self):
        return 'r:gz2'


def _splitext(filename, level):
    """Similar to os.path.splitext but split ``level`` number of tiles

    >>> splitext('foo.tar.gz', 2)
    'foo'
    >>> splitext('foo.zip', 1)
    'foo'
    """
    name = filename
    for level_no in range(level):
        name = os.path.splitext(name)[0]
    return name
	import os
	from os.path import abspath
	import tarfile
	import zipfile

	from pypm.util import console


	def extract_file(path, to_directory='.'):
	compressedobj = CompressedFile.detect(path)
	return compressedobj.extract_to(to_directory)

	def make_tarball(targetfile, container_directory, recursive=True):
	"""Make a tarball (type=".tar.gz") of all files under `container_directory`
	and store it as `targetfile`
	"""
	targetfile = abspath(targetfile)
	if os.path.isfile(targetfile):
	console.rm(None, targetfile)

	with console.change_directory(container_directory):
	tarball = tarfile.open(targetfile, 'w:gz')
	try:
	for content in os.listdir('.'):
	tarball.add(content)
	finally:
	tarball.close()

	class InvalidFile(Exception):
	"""The given compressed file is invalid. It cannot be extracted"""

	__all__ = ['extract_file', 'make_tarball']


	## -- internal

	class CompressedFile(object):

	def __init__(self, filename, ext):
	self.filename = filename
	self.ext = ext

	@staticmethod
	def detect(filename):
	"""Detect ``filename`` and return the appropriate compressed file
	object"""
	implementors = [ZippedFile, GzipTarredFile, Bzip2TarredFile]
	for implementor in implementors:
	for ext in implementor.EXTENSIONS:
	if filename.endswith(ext):
	return implementor(filename, ext)
	raise InvalidFile, 'not a valid compressed file type: {0}'.format(filename)

	def extract_to(self, directory):
	"""Extract all files recursively under ``directory``

	Return the final extracted directory
	"""
	with console.change_directory(directory):
	self._extract()
	return self._possible_dir_name()

	def _extract(self):
	"""Extract all to current directory"""
	raise NotImplementedError

	def _possible_dir_name(self):
	"""The directory where the the files are possibly extracted.

	Usually if 'foo.tar.gz' is extracted.. then we believe the files are
	actually in foo/. Is this true for all files of type .tgz/.tbz/.zip? I
	don't know.

	XXX: verify the correctness of this assumption
	"""
	return _splitext(self.filename, self.ext.count('.'))

	class ZippedFile(CompressedFile):

	EXTENSIONS = ['.zip']

	def _extract(self):
	try:
	f = zipfile.ZipFile(self.filename, 'r')
	try:
	f.extractall()
	finally:
	f.close()
	except (zipfile.BadZipfile, zipfile.LargeZipFile), e:
	raise InvalidFile, e

	class TarredFile(CompressedFile):

	def _extract(self):
	try:
	f = tarfile.open(self.filename, self._get_mode())
	try:
	self._ensure_read_access(f)
	f.extractall()
	finally:
	f.close()
	except tarfile.TarError, e:
	raise InvalidFile, e

	def _get_mode(self):
	"""Return the mode for this tarfile"""
	raise NotImplementedError

	def _ensure_read_access(self, tarfileobj):
	"""Ensure that the given tarfile will be readable by the user after
	extraction.

	Some tarballs have u-x set on directories. They may as well have u-r set
	on files. We reset such perms here.. so that the extracted files remain
	accessible.

	See also: http://bugs.python.org/issue6196
	"""
	EXECUTE = 0100
	READ = 0400
	dir_perm = EXECUTE
	file_perm = EXECUTE \| READ

	# WARNING: if the tarfile had a huge of list of files, this could be a
	# potential performance bottleneck.
	for tarinfo in tarfileobj.getmembers():
	tarinfo.mode \|= (dir_perm if tarinfo.isdir() else file_perm)


	class GzipTarredFile(TarredFile):

	EXTENSIONS = ['.tar.gz', '.tgz']

	def _get_mode(self):
	return 'r:gz'

	class Bzip2TarredFile(TarredFile):

	EXTENSIONS = ['.tar.bz2', '.tbz']

	def _get_mode(self):
	return 'r:gz2'


	def _splitext(filename, level):
	"""Similar to os.path.splitext but split ``level`` number of tiles

	>>> splitext('foo.tar.gz', 2)
	'foo'
	>>> splitext('foo.zip', 1)
	'foo'
	"""
	name = filename
	for level_no in range(level):
	name = os.path.splitext(name)[0]
	return name