Created
March 8, 2012 14:55
-
-
Save dandrake/2001367 to your computer and use it in GitHub Desktop.
script to print out file sizes from duplicity difftar and sigtar files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
I use duplicity to back up my files, and cron sends me an email every | |
day with the output from the backup. Sometimes the size of an | |
incremental backup is unusually large, and I wonder what made it so big. | |
This script takes a .difftar.gpg or .sigtar.gpg file and prints the | |
names of the files in it, listed in decreasing order by size. There's no | |
error checking of any kind. Enjoy! | |
Usage: | |
duplicitysizes.py [.difftar.gpg or .sigtar.gpg files] | |
This code is public domain, although I would appreciate a mention if you | |
use this. | |
--Dan Drake (http://mathsci.kaist.ac.kr/~drake) | |
""" | |
import os | |
import shutil | |
import subprocess | |
import sys | |
import tarfile | |
import tempfile | |
def tarfile_to_sorted(fn): | |
""" | |
Returns a list of tuples (filename, kind, size), sorted in | |
increasing order by size. "kind" is one of the directories duplicity | |
uses in its difftar and sigtar files: "multivol_diff", "deleted", | |
"snapshot", etc. | |
""" | |
d = {} | |
for f in tarfile.open(fn).getmembers(): | |
try: | |
kind, rest = f.name.split('/', 1) | |
except ValueError: | |
# sigtar files contain a file named 'snapshot', so there's | |
# only value in the split. We just ignore that. | |
continue | |
if kind.startswith('multivol'): | |
name = rest.rsplit('/', 1)[0] | |
else: | |
name = rest | |
size = f.size | |
if d.has_key(name): | |
oldkind, oldsize = d[name] | |
d[name] = (kind, oldsize + size) | |
else: | |
d[name] = (kind, size) | |
return sorted(((k, v[0], v[1]) for k, v in d.iteritems()), key=lambda _: _[2]) | |
if __name__ == '__main__': | |
for fn_ in sys.argv[1:]: | |
print 'In file {}:'.format(fn_) | |
_, fn = tempfile.mkstemp(suffix='.tar.gpg') | |
shutil.copy(fn_, fn) | |
subprocess.call(['gpg', '--decrypt-files', fn]) | |
for f, t, s in reversed(tarfile_to_sorted(fn[:-4])): | |
print '{:,} {} ({})'.format(s, f, t) | |
os.remove(fn[:-4]) | |
os.remove(fn) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment