Skip to content

Instantly share code, notes, and snippets.

@ahebrank
Last active August 29, 2015 14:00
Show Gist options
  • Save ahebrank/6f66aca8d3a6645bf0fe to your computer and use it in GitHub Desktop.
Save ahebrank/6f66aca8d3a6645bf0fe to your computer and use it in GitHub Desktop.
Sort du -sh output by size
import re
import sys
if len(sys.argv)<3:
print "Need an input filename (i.e., du -sh > input.txt) and a csv output filename"
sys.exit(1)
# pass a file with du -h output as the first command line argument
with open(sys.argv[1]) as f:
lines = [x.strip() for x in f.readlines()]
# du output is two columns: filesize, filename
r = re.compile("^([\d\.]+[TGMK])\s+(.+)$")
matches = [(r.search(x)) for x in lines]
items = [x.groups() for x in matches if x is not None]
# split the filesize into a number and the K, M, G, etc. multiplier
r = re.compile("([\d\.]+)(\w)")
# make a sorter that recognizes the size suffixes
def gen_comparator():
def expand(x):
matches = r.search(x).groups()
mult = 1
if matches[1]=='K':
mult = 1e3
if matches[1]=='M':
mult = 1e6
if matches[1]=='G':
mult = 1e9
if matches[1]=='T':
mult = 1e12
return float(matches[0])*mult
def compare(x, y):
x = expand(x[0])
y = expand(y[0])
if x < y:
return -1
elif y < x:
return 1
else:
return 0
return compare
# sort
sortedSizes = sorted(items, cmp=gen_comparator(), reverse=True)
# output to csv
import csv
with open(sys.argv[2], 'w') as fout:
a = csv.writer(fout, delimiter=',')
a.writerows(sortedSizes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment