Skip to content

Instantly share code, notes, and snippets.

@vgmoose
Last active August 29, 2015 14:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vgmoose/dbe6939b21b8368fa0e9 to your computer and use it in GitHub Desktop.
Save vgmoose/dbe6939b21b8368fa0e9 to your computer and use it in GitHub Desktop.
List top 20 files + sizes over 300MB in / or specified directory
import os
import functools
import sys
from heapq import *
from math import log
def pretty_size(n,pow=0,b=1024,u='B',pre=['']+[p for p in'KMGTPEZY']):
pow,n=min(int(log(max(n*b**pow,1),b)),len(pre)-1),n*b**pow
return "%%.%if %%s%%s"%abs(pow%(-pow-1))%(n/b**float(pow),pre[pow],u)
startpath = "/"
count = 0
tsize = 0
if len(sys.argv) > 1:
startpath = sys.argv[1]
data = []
threshold = 300000000 # 300 megabytes
top = 100 # show top 20 files
for (path, dirs, files) in os.walk(startpath):
for filename in files:
try:
size = os.stat(path+"/"+filename).st_size
tsize += size
except:
size = 0
pass
#print "Error accessing "+path+"/"+filename
count += 1
if count%10000 == 0:
print "scanned "+str(count)+" files, "+pretty_size(tsize)+" so far ("+(path+'/'+filename)+")"
if size >= threshold:
heappush(data, (-1*size, path+filename))
for x in range(0, top):
#while True:
try:
entry = heappop(data)
print pretty_size(entry[0]*-1)+": "+entry[1]
except:
break
print("\nTotal Files: "+str(count))
print("Total Size: "+pretty_size(tsize))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment