Skip to content

Instantly share code, notes, and snippets.

@Nagyman
Created April 23, 2012 17:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Nagyman/2472658 to your computer and use it in GitHub Desktop.
Save Nagyman/2472658 to your computer and use it in GitHub Desktop.
Python Script to Identify Git Commits with Files Over a Specific Size
#!/usr/bin/env python
'''
Modified version from:
http://stackoverflow.com/questions/298314/find-files-in-git-repo-over-x-megabytes-that-dont-exist-in-head
Examples:
# Commits bigger than 1MB
python git-big.py 1048576
# Output visible on console and logged to file
python git-big.py 1048576 | tee ~/big-commits.log
'''
import os, sys
def getOutput(cmd):
return os.popen(cmd).read()
def convert_bytes(bytes):
bytes = float(bytes)
if bytes >= 1099511627776:
terabytes = bytes / 1099511627776
size = '%.2fT' % terabytes
elif bytes >= 1073741824:
gigabytes = bytes / 1073741824
size = '%.2fG' % gigabytes
elif bytes >= 1048576:
megabytes = bytes / 1048576
size = '%.2fM' % megabytes
elif bytes >= 1024:
kilobytes = bytes / 1024
size = '%.2fK' % kilobytes
else:
size = '%.2fb' % bytes
return size
if (len(sys.argv) <> 2):
print "usage: %s size (bytes)" % sys.argv[0]
else:
maxSize = int(sys.argv[1])
revisions = getOutput("git rev-list HEAD").split()
total = 0
paths = set()
unbuffered = os.fdopen(sys.stdout.fileno(), 'w', 0)
separator = ('-' * 50) + "\n"
unbuffered.write('\nCommits larger than %s:\n' % convert_bytes(maxSize))
unbuffered.write(separator)
unbuffered.write(" Size Commit Path\n")
unbuffered.write(separator)
for revision in revisions:
files = getOutput("git ls-tree -zrl %s" % revision).split('\0')
for file in files:
if file == "":
continue
splitdata = file.split();
commit = splitdata[2];
size = int(splitdata[3])
path = splitdata[4]
if (size > maxSize) and path not in paths:
total = total + size
paths.add(path)
unbuffered.write("%10s %s %s\n" % (convert_bytes(size), commit[0:7], path))
# Show the total
unbuffered.write(separator)
unbuffered.write('%10s\n' % convert_bytes(total))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment