Last active
August 29, 2015 14:25
-
-
Save vgmoose/dbe6939b21b8368fa0e9 to your computer and use it in GitHub Desktop.
List top 20 files + sizes over 300MB in / or specified directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import functools | |
import sys | |
from heapq import * | |
from math import log | |
def pretty_size(n,pow=0,b=1024,u='B',pre=['']+[p for p in'KMGTPEZY']): | |
pow,n=min(int(log(max(n*b**pow,1),b)),len(pre)-1),n*b**pow | |
return "%%.%if %%s%%s"%abs(pow%(-pow-1))%(n/b**float(pow),pre[pow],u) | |
startpath = "/" | |
count = 0 | |
tsize = 0 | |
if len(sys.argv) > 1: | |
startpath = sys.argv[1] | |
data = [] | |
threshold = 300000000 # 300 megabytes | |
top = 100 # show top 20 files | |
for (path, dirs, files) in os.walk(startpath): | |
for filename in files: | |
try: | |
size = os.stat(path+"/"+filename).st_size | |
tsize += size | |
except: | |
size = 0 | |
pass | |
#print "Error accessing "+path+"/"+filename | |
count += 1 | |
if count%10000 == 0: | |
print "scanned "+str(count)+" files, "+pretty_size(tsize)+" so far ("+(path+'/'+filename)+")" | |
if size >= threshold: | |
heappush(data, (-1*size, path+filename)) | |
for x in range(0, top): | |
#while True: | |
try: | |
entry = heappop(data) | |
print pretty_size(entry[0]*-1)+": "+entry[1] | |
except: | |
break | |
print("\nTotal Files: "+str(count)) | |
print("Total Size: "+pretty_size(tsize)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment