Skip to content

Instantly share code, notes, and snippets.

Created July 29, 2016 09:49
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Python ZIP streaming
import struct
import zipfile
import time
import os
from binascii import crc32
def commonprefix(m):
"Given a list of pathnames, returns the longest common leading component"
if not m: return ''
s1 = min(m)
s2 = max(m)
for i, c in enumerate(s1):
if c != s2[i]:
return s1[:i]
return s1
def relpath(path, start=os.path.curdir):
"""Return a relative version of a path"""
if not path:
raise ValueError("no path specified")
start_list = [x for x in os.path.abspath(start).split(os.path.sep) if x]
path_list = [x for x in os.path.abspath(path).split(os.path.sep) if x]
i = len(commonprefix([start_list, path_list]))
rel_list = [os.path.pardir] * (len(start_list)-i) + path_list[i:]
if not rel_list:
return os.path.curdir
return os.path.join(*rel_list)
class ZipBuffer(object):
""" A file-like object for zipfile.ZipFile to write into. """
def __init__(self): = []
self.pos = 0
def write(self, data):
self.pos += len(data)
def tell(self):
# zipfile calls this so we need it
return self.pos
def flush(self):
# zipfile calls this so we need it
def get_and_clear(self):
result = = []
return result
class XZipFile(zipfile.ZipFile):
def write_streaming(self, zinfo_or_arcname, filename, compress_type=None):
"""Write a file into the archive. The contents is the string
'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
the name of the file in the archive."""
if not isinstance(zinfo_or_arcname, zipfile.ZipInfo):
zinfo = zipfile.ZipInfo(
zinfo.compress_type = self.compression
zinfo.flag_bits = 0x08 # magic
zinfo.external_attr = 0600 << 16
zinfo = zinfo_or_arcname
if not self.fp:
raise RuntimeError(
"Attempt to write to ZIP archive that was already closed")
if compress_type is not None:
raise RuntimeError("Compression not supported!")
zinfo.CRC = CRC = 0
zinfo.compress_size = 0
zinfo.file_size = file_size = 0
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._didModify = True
with open(filename, 'rb') as fp:
while 1:
buf = * 8)
if not buf:
file_size = file_size + len(buf)
CRC = crc32(buf, CRC) & 0xffffffff
yield None
zinfo.CRC = CRC
zinfo.file_size = file_size
zinfo.compress_size = file_size
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
self.NameToInfo[zinfo.filename] = zinfo
class FileGenerator(object):
def __init__(self, src_dir, passes_filters): = os.path.basename(src_dir.rstrip('/')) + 'zip'
self.generator = _stream_folder(src_dir, passes_filters)
def read(self, n):
x =
while not len(x):
x =
return x
except StopIteration:
return ''
def stream_folder(src_dir, passes_filters):
return FileGenerator(src_dir, passes_filters)
def _stream_folder(src_dir, passes_filters):
sink = ZipBuffer()
archive = XZipFile(sink, mode='w', compression=zipfile.ZIP_STORED,
for root, _, files in os.walk(unicode(src_dir)):
for f in files:
path = os.path.join(root, f)
if passes_filters(path):
rel_path = relpath(path, src_dir).replace(os.path.sep, '/')
for _ in archive.write_streaming(rel_path, path):
for chunk in sink.get_and_clear():
yield chunk
# close() generates some more data, so we yield that too
for chunk in sink.get_and_clear():
yield chunk
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment