Created
August 22, 2013 20:38
-
-
Save nakedible/6312482 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Written by Nuutti Kotivuori <naked@iki.fi> | |
# | |
# This work is free. You can redistribute it and/or modify it under the | |
# terms of the Do What The Fuck You Want To Public License, Version 2, | |
# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details. | |
import sys, yaml, tarfile | |
# TODO: xz support requires python 3.3, but may work | |
# TODO: detect source compression format and use that in output as well | |
# TODO: proper command line option parsing | |
# TODO: ar format parsing and generation, nested metadata | |
AR_HEADER_FMT = '=16s12s6s6s8s10s2s' | |
# XXX: monkey patch tarfile class: GNU tar leaves devmajor and | |
# devminor as empty if file is not device, while tarfile puts in 7 | |
# zeros (octal). This change makes sure the fields are empty in | |
# tarfile as well. | |
def fix_tar(): | |
import struct | |
from tarfile import itn, stn, calc_chksums, POSIX_MAGIC, REGTYPE, CHRTYPE, BLKTYPE, BLOCKSIZE | |
def _create_header_fixed(_, info, format): | |
"""Return a header block. info is a dictionary with file | |
information, format must be one of the *_FORMAT constants. | |
""" | |
parts = [ | |
stn(info.get("name", ""), 100), | |
itn(info.get("mode", 0) & 07777, 8, format), | |
itn(info.get("uid", 0), 8, format), | |
itn(info.get("gid", 0), 8, format), | |
itn(info.get("size", 0), 12, format), | |
itn(info.get("mtime", 0), 12, format), | |
" ", # checksum field | |
info.get("type", REGTYPE), | |
stn(info.get("linkname", ""), 100), | |
stn(info.get("magic", POSIX_MAGIC), 8), | |
stn(info.get("uname", ""), 32), | |
stn(info.get("gname", ""), 32), | |
itn(info.get("devmajor", 0), 8, format) if info.get("type") in [CHRTYPE, BLKTYPE] else stn("", 8), | |
itn(info.get("devminor", 0), 8, format) if info.get("type") in [CHRTYPE, BLKTYPE] else stn("", 8), | |
stn(info.get("prefix", ""), 155) | |
] | |
buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) | |
chksum = calc_chksums(buf[-BLOCKSIZE:])[0] | |
buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] | |
return buf | |
tarfile.TarInfo._create_header = _create_header_fixed | |
fix_tar() | |
def create_metadata(srctar): | |
with tarfile.open(srctar, mode='r|*') as tar: | |
members = [] | |
for info in tar: | |
members.append(dict(name=info.name, | |
size=info.size, | |
mtime=info.mtime, | |
mode=info.mode, | |
type=info.type, | |
linkname=info.linkname, | |
uid=info.uid, | |
gid=info.gid, | |
uname=info.uname, | |
gname=info.gname, | |
devmajor=info.devmajor, | |
devminor=info.devminor, | |
pax_headers=info.pax_headers)) | |
return dict(format=tar.format, | |
pax_headers=tar.pax_headers, | |
members=members) | |
def apply_metadata(metadata, srctar, dsttar): | |
srctar = tarfile.open(sys.argv[1], 'r:*') | |
with tarfile.open(name=dsttar, mode='w|', format=metadata['format'], pax_headers=metadata['pax_headers']) as tar: | |
for member in metadata['members']: | |
info = tarfile.TarInfo(member['name']) | |
info.size = member['size'] | |
info.mtime = member['mtime'] | |
info.mode = member['mode'] | |
info.type = member['type'] | |
info.linkname = member['linkname'] | |
info.uid = member['uid'] | |
info.gid = member['gid'] | |
info.uname = member['uname'] | |
info.gname = member['gname'] | |
info.devmajor = member['devmajor'] | |
info.devminor = member['devminor'] | |
info.pax_headers = member['pax_headers'] | |
tar.addfile(info, srctar.extractfile(member['name'])) | |
def main(): | |
if len(sys.argv) == 2: | |
metadata = create_metadata(sys.argv[1]) | |
print yaml.dump(metadata, default_flow_style=False) | |
elif len(sys.argv) == 3: | |
metadata = yaml.load(sys.stdin) | |
apply_metadata(metadata, sys.argv[1], sys.argv[2]) | |
else: | |
print 'aiee' | |
if __name__ == '__main__': | |
main() |
Also... how did you find out about the ReproducibleBuilds stuff? (-:
BTW, I strongly encourage you to "watch" the https://wiki.debian.org/ReproducibleBuilds page!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is awesome. In testing this, I found a related issue, which arguably would be within scope of metatar.py: if the tar file is actually a tar.gz file, then they might have different timestamp values in the GzipFile objects. It would be nice to have a way to remove/normalize that.
Additionally, it would be great to have a similar tool for the timestamp in the 'ar' header in the *.deb file. See also http://en.wikipedia.org/wiki/Ar_%28Unix%29#File_format_details
Once we have those things done, we effectively have a script that can take two *.deb files, show these trivial metadata differences, and adjust the metadata so they are the same. This would be huge.