Created
April 23, 2012 17:50
-
-
Save Nagyman/2472658 to your computer and use it in GitHub Desktop.
Python Script to Identify Git Commits with Files Over a Specific Size
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
Modified version from: | |
http://stackoverflow.com/questions/298314/find-files-in-git-repo-over-x-megabytes-that-dont-exist-in-head | |
Examples: | |
# Commits bigger than 1MB | |
python git-big.py 1048576 | |
# Output visible on console and logged to file | |
python git-big.py 1048576 | tee ~/big-commits.log | |
''' | |
import os, sys | |
def getOutput(cmd): | |
return os.popen(cmd).read() | |
def convert_bytes(bytes): | |
bytes = float(bytes) | |
if bytes >= 1099511627776: | |
terabytes = bytes / 1099511627776 | |
size = '%.2fT' % terabytes | |
elif bytes >= 1073741824: | |
gigabytes = bytes / 1073741824 | |
size = '%.2fG' % gigabytes | |
elif bytes >= 1048576: | |
megabytes = bytes / 1048576 | |
size = '%.2fM' % megabytes | |
elif bytes >= 1024: | |
kilobytes = bytes / 1024 | |
size = '%.2fK' % kilobytes | |
else: | |
size = '%.2fb' % bytes | |
return size | |
if (len(sys.argv) <> 2): | |
print "usage: %s size (bytes)" % sys.argv[0] | |
else: | |
maxSize = int(sys.argv[1]) | |
revisions = getOutput("git rev-list HEAD").split() | |
total = 0 | |
paths = set() | |
unbuffered = os.fdopen(sys.stdout.fileno(), 'w', 0) | |
separator = ('-' * 50) + "\n" | |
unbuffered.write('\nCommits larger than %s:\n' % convert_bytes(maxSize)) | |
unbuffered.write(separator) | |
unbuffered.write(" Size Commit Path\n") | |
unbuffered.write(separator) | |
for revision in revisions: | |
files = getOutput("git ls-tree -zrl %s" % revision).split('\0') | |
for file in files: | |
if file == "": | |
continue | |
splitdata = file.split(); | |
commit = splitdata[2]; | |
size = int(splitdata[3]) | |
path = splitdata[4] | |
if (size > maxSize) and path not in paths: | |
total = total + size | |
paths.add(path) | |
unbuffered.write("%10s %s %s\n" % (convert_bytes(size), commit[0:7], path)) | |
# Show the total | |
unbuffered.write(separator) | |
unbuffered.write('%10s\n' % convert_bytes(total)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment