Skip to content

Instantly share code, notes, and snippets.

@allista
Forked from chipx86/streaming-tar.py
Last active July 15, 2018 18:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allista/d3dc1984212d3d033eb29c78914e5887 to your computer and use it in GitHub Desktop.
Save allista/d3dc1984212d3d033eb29c78914e5887 to your computer and use it in GitHub Desktop.
Sample code to build a tar chunk-by-chunk and stream it out all at once.
# Building a tar file chunk-by-chunk.
#
# This is a quick bit of sample code for streaming data to a tar file,
# building it piece-by-piece. The tarfile is built on-the-fly and streamed
# back out. This is useful for web applications that need to dynamically
# build a tar file without swamping the server.
import tarfile
from io import BytesIO
class FileStream:
def __init__(self):
self.buffer = BytesIO()
self.offset = 0
def write(self, s):
self.buffer.write(s)
self.offset += len(s)
def tell(self):
return self.offset
def close(self):
self.buffer.close()
def pop(self):
s = self.buffer.getvalue()
self.buffer.close()
self.buffer = BytesIO()
return s
@staticmethod
def _split_every(n, text):
while text:
yield text[:n]
text = text[n:]
@classmethod
def yield_tar_gz(cls, file_data_iterable):
stream = FileStream()
tar = tarfile.TarFile.open(mode='w|gz', fileobj=stream)
for filename, data in file_data_iterable:
tar_info = tarfile.TarInfo(filename)
tar.addfile(tar_info)
yield stream.pop()
for chunk in cls._split_every(tarfile.BLOCKSIZE, data):
bin_chunk = chunk.encode('utf8')
tar_info.size += len(bin_chunk)
tar.fileobj.write(bin_chunk)
yield stream.pop()
blocks, remainder = divmod(tar_info.size, tarfile.BLOCKSIZE)
if remainder > 0:
tar.fileobj.write(tarfile.NUL *
(tarfile.BLOCKSIZE - remainder))
yield stream.pop()
blocks += 1
tar.offset += blocks * tarfile.BLOCKSIZE
tar.close()
yield stream.pop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment