Skip to content
Create a gist now

Instantly share code, notes, and snippets.

# Written by Nuutti Kotivuori <>
# This work is free. You can redistribute it and/or modify it under the
# terms of the Do What The Fuck You Want To Public License, Version 2,
# as published by Sam Hocevar. See for more details.
import sys, yaml, tarfile
# TODO: xz support requires python 3.3, but may work
# TODO: detect source compression format and use that in output as well
# TODO: proper command line option parsing
# TODO: ar format parsing and generation, nested metadata
AR_HEADER_FMT = '=16s12s6s6s8s10s2s'
# XXX: monkey patch tarfile class: GNU tar leaves devmajor and
# devminor as empty if file is not device, while tarfile puts in 7
# zeros (octal). This change makes sure the fields are empty in
# tarfile as well.
def fix_tar():
import struct
from tarfile import itn, stn, calc_chksums, POSIX_MAGIC, REGTYPE, CHRTYPE, BLKTYPE, BLOCKSIZE
def _create_header_fixed(_, info, format):
"""Return a header block. info is a dictionary with file
information, format must be one of the *_FORMAT constants.
parts = [
stn(info.get("name", ""), 100),
itn(info.get("mode", 0) & 07777, 8, format),
itn(info.get("uid", 0), 8, format),
itn(info.get("gid", 0), 8, format),
itn(info.get("size", 0), 12, format),
itn(info.get("mtime", 0), 12, format),
" ", # checksum field
info.get("type", REGTYPE),
stn(info.get("linkname", ""), 100),
stn(info.get("magic", POSIX_MAGIC), 8),
stn(info.get("uname", ""), 32),
stn(info.get("gname", ""), 32),
itn(info.get("devmajor", 0), 8, format) if info.get("type") in [CHRTYPE, BLKTYPE] else stn("", 8),
itn(info.get("devminor", 0), 8, format) if info.get("type") in [CHRTYPE, BLKTYPE] else stn("", 8),
stn(info.get("prefix", ""), 155)
buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
return buf
tarfile.TarInfo._create_header = _create_header_fixed
def create_metadata(srctar):
with, mode='r|*') as tar:
members = []
for info in tar:
return dict(format=tar.format,
def apply_metadata(metadata, srctar, dsttar):
srctar =[1], 'r:*')
with, mode='w|', format=metadata['format'], pax_headers=metadata['pax_headers']) as tar:
for member in metadata['members']:
info = tarfile.TarInfo(member['name'])
info.size = member['size']
info.mtime = member['mtime']
info.mode = member['mode']
info.type = member['type']
info.linkname = member['linkname']
info.uid = member['uid']
info.gid = member['gid']
info.uname = member['uname']
info.gname = member['gname']
info.devmajor = member['devmajor']
info.devminor = member['devminor']
info.pax_headers = member['pax_headers']
tar.addfile(info, srctar.extractfile(member['name']))
def main():
if len(sys.argv) == 2:
metadata = create_metadata(sys.argv[1])
print yaml.dump(metadata, default_flow_style=False)
elif len(sys.argv) == 3:
metadata = yaml.load(sys.stdin)
apply_metadata(metadata, sys.argv[1], sys.argv[2])
print 'aiee'
if __name__ == '__main__':

This is awesome. In testing this, I found a related issue, which arguably would be within scope of if the tar file is actually a tar.gz file, then they might have different timestamp values in the GzipFile objects. It would be nice to have a way to remove/normalize that.

Additionally, it would be great to have a similar tool for the timestamp in the 'ar' header in the *.deb file. See also

Once we have those things done, we effectively have a script that can take two *.deb files, show these trivial metadata differences, and adjust the metadata so they are the same. This would be huge.


Also... how did you find out about the ReproducibleBuilds stuff? (-:


BTW, I strongly encourage you to "watch" the page!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.