Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
# Written by Nuutti Kotivuori <>
# This work is free. You can redistribute it and/or modify it under the
# terms of the Do What The Fuck You Want To Public License, Version 2,
# as published by Sam Hocevar. See for more details.
import sys, yaml, tarfile
# TODO: xz support requires python 3.3, but may work
# TODO: detect source compression format and use that in output as well
# TODO: proper command line option parsing
# TODO: ar format parsing and generation, nested metadata
AR_HEADER_FMT = '=16s12s6s6s8s10s2s'
# XXX: monkey patch tarfile class: GNU tar leaves devmajor and
# devminor as empty if file is not device, while tarfile puts in 7
# zeros (octal). This change makes sure the fields are empty in
# tarfile as well.
def fix_tar():
import struct
from tarfile import itn, stn, calc_chksums, POSIX_MAGIC, REGTYPE, CHRTYPE, BLKTYPE, BLOCKSIZE
def _create_header_fixed(_, info, format):
"""Return a header block. info is a dictionary with file
information, format must be one of the *_FORMAT constants.
parts = [
stn(info.get("name", ""), 100),
itn(info.get("mode", 0) & 07777, 8, format),
itn(info.get("uid", 0), 8, format),
itn(info.get("gid", 0), 8, format),
itn(info.get("size", 0), 12, format),
itn(info.get("mtime", 0), 12, format),
" ", # checksum field
info.get("type", REGTYPE),
stn(info.get("linkname", ""), 100),
stn(info.get("magic", POSIX_MAGIC), 8),
stn(info.get("uname", ""), 32),
stn(info.get("gname", ""), 32),
itn(info.get("devmajor", 0), 8, format) if info.get("type") in [CHRTYPE, BLKTYPE] else stn("", 8),
itn(info.get("devminor", 0), 8, format) if info.get("type") in [CHRTYPE, BLKTYPE] else stn("", 8),
stn(info.get("prefix", ""), 155)
buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
return buf
tarfile.TarInfo._create_header = _create_header_fixed
def create_metadata(srctar):
with, mode='r|*') as tar:
members = []
for info in tar:
return dict(format=tar.format,
def apply_metadata(metadata, srctar, dsttar):
srctar =[1], 'r:*')
with, mode='w|', format=metadata['format'], pax_headers=metadata['pax_headers']) as tar:
for member in metadata['members']:
info = tarfile.TarInfo(member['name'])
info.size = member['size']
info.mtime = member['mtime']
info.mode = member['mode']
info.type = member['type']
info.linkname = member['linkname']
info.uid = member['uid']
info.gid = member['gid']
info.uname = member['uname']
info.gname = member['gname']
info.devmajor = member['devmajor']
info.devminor = member['devminor']
info.pax_headers = member['pax_headers']
tar.addfile(info, srctar.extractfile(member['name']))
def main():
if len(sys.argv) == 2:
metadata = create_metadata(sys.argv[1])
print yaml.dump(metadata, default_flow_style=False)
elif len(sys.argv) == 3:
metadata = yaml.load(sys.stdin)
apply_metadata(metadata, sys.argv[1], sys.argv[2])
print 'aiee'
if __name__ == '__main__':

This is awesome. In testing this, I found a related issue, which arguably would be within scope of if the tar file is actually a tar.gz file, then they might have different timestamp values in the GzipFile objects. It would be nice to have a way to remove/normalize that.

Additionally, it would be great to have a similar tool for the timestamp in the 'ar' header in the *.deb file. See also

Once we have those things done, we effectively have a script that can take two *.deb files, show these trivial metadata differences, and adjust the metadata so they are the same. This would be huge.

Also... how did you find out about the ReproducibleBuilds stuff? (-:

BTW, I strongly encourage you to "watch" the page!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment